summaryrefslogtreecommitdiff
path: root/src/video_core
diff options
context:
space:
mode:
authorGravatar bunnei2021-07-25 11:39:04 -0700
committerGravatar GitHub2021-07-25 11:39:04 -0700
commit98b26b6e126d4775fdf3f773fe8a8ac808a8ff8f (patch)
tree816faa96c2c4d291825063433331a8ea4b3d08f1 /src/video_core
parentMerge pull request #6699 from lat9nq/common-threads (diff)
parentshader: Support out of bound local memory reads and immediate writes (diff)
downloadyuzu-98b26b6e126d4775fdf3f773fe8a8ac808a8ff8f.tar.gz
yuzu-98b26b6e126d4775fdf3f773fe8a8ac808a8ff8f.tar.xz
yuzu-98b26b6e126d4775fdf3f773fe8a8ac808a8ff8f.zip
Merge pull request #6585 from ameerj/hades
Shader Decompiler Rewrite
Diffstat (limited to 'src/video_core')
-rw-r--r--src/video_core/CMakeLists.txt80
-rw-r--r--src/video_core/buffer_cache/buffer_cache.h269
-rw-r--r--src/video_core/dirty_flags.cpp6
-rw-r--r--src/video_core/dirty_flags.h2
-rw-r--r--src/video_core/engines/const_buffer_engine_interface.h103
-rw-r--r--src/video_core/engines/kepler_compute.cpp45
-rw-r--r--src/video_core/engines/kepler_compute.h21
-rw-r--r--src/video_core/engines/maxwell_3d.cpp39
-rw-r--r--src/video_core/engines/maxwell_3d.h46
-rw-r--r--src/video_core/engines/maxwell_dma.cpp3
-rw-r--r--src/video_core/engines/shader_bytecode.h2298
-rw-r--r--src/video_core/engines/shader_header.h158
-rw-r--r--src/video_core/engines/shader_type.h21
-rw-r--r--src/video_core/guest_driver.cpp37
-rw-r--r--src/video_core/guest_driver.h46
-rw-r--r--src/video_core/memory_manager.cpp1
-rw-r--r--src/video_core/rasterizer_interface.h16
-rw-r--r--src/video_core/renderer_opengl/gl_arb_decompiler.cpp2124
-rw-r--r--src/video_core/renderer_opengl/gl_arb_decompiler.h29
-rw-r--r--src/video_core/renderer_opengl/gl_buffer_cache.cpp84
-rw-r--r--src/video_core/renderer_opengl/gl_buffer_cache.h57
-rw-r--r--src/video_core/renderer_opengl/gl_compute_pipeline.cpp209
-rw-r--r--src/video_core/renderer_opengl/gl_compute_pipeline.h93
-rw-r--r--src/video_core/renderer_opengl/gl_device.cpp146
-rw-r--r--src/video_core/renderer_opengl/gl_device.h84
-rw-r--r--src/video_core/renderer_opengl/gl_graphics_pipeline.cpp572
-rw-r--r--src/video_core/renderer_opengl/gl_graphics_pipeline.h169
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp448
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.h44
-rw-r--r--src/video_core/renderer_opengl/gl_resource_manager.cpp27
-rw-r--r--src/video_core/renderer_opengl/gl_resource_manager.h14
-rw-r--r--src/video_core/renderer_opengl/gl_shader_cache.cpp994
-rw-r--r--src/video_core/renderer_opengl/gl_shader_cache.h172
-rw-r--r--src/video_core/renderer_opengl/gl_shader_context.h33
-rw-r--r--src/video_core/renderer_opengl/gl_shader_decompiler.cpp2986
-rw-r--r--src/video_core/renderer_opengl/gl_shader_decompiler.h69
-rw-r--r--src/video_core/renderer_opengl/gl_shader_disk_cache.cpp482
-rw-r--r--src/video_core/renderer_opengl/gl_shader_disk_cache.h176
-rw-r--r--src/video_core/renderer_opengl/gl_shader_manager.cpp146
-rw-r--r--src/video_core/renderer_opengl/gl_shader_manager.h185
-rw-r--r--src/video_core/renderer_opengl/gl_shader_util.cpp123
-rw-r--r--src/video_core/renderer_opengl/gl_shader_util.h89
-rw-r--r--src/video_core/renderer_opengl/gl_state_tracker.cpp6
-rw-r--r--src/video_core/renderer_opengl/gl_state_tracker.h1
-rw-r--r--src/video_core/renderer_opengl/gl_texture_cache.cpp361
-rw-r--r--src/video_core/renderer_opengl/gl_texture_cache.h51
-rw-r--r--src/video_core/renderer_opengl/maxwell_to_gl.h108
-rw-r--r--src/video_core/renderer_opengl/renderer_opengl.cpp60
-rw-r--r--src/video_core/renderer_opengl/renderer_opengl.h6
-rw-r--r--src/video_core/renderer_opengl/util_shaders.cpp23
-rw-r--r--src/video_core/renderer_vulkan/blit_image.cpp40
-rw-r--r--src/video_core/renderer_vulkan/blit_image.h2
-rw-r--r--src/video_core/renderer_vulkan/fixed_pipeline_state.cpp92
-rw-r--r--src/video_core/renderer_vulkan/fixed_pipeline_state.h79
-rw-r--r--src/video_core/renderer_vulkan/maxwell_to_vk.cpp54
-rw-r--r--src/video_core/renderer_vulkan/maxwell_to_vk.h7
-rw-r--r--src/video_core/renderer_vulkan/pipeline_helper.h154
-rw-r--r--src/video_core/renderer_vulkan/renderer_vulkan.cpp60
-rw-r--r--src/video_core/renderer_vulkan/vk_blit_screen.cpp94
-rw-r--r--src/video_core/renderer_vulkan/vk_buffer_cache.cpp68
-rw-r--r--src/video_core/renderer_vulkan/vk_buffer_cache.h22
-rw-r--r--src/video_core/renderer_vulkan/vk_compute_pass.cpp270
-rw-r--r--src/video_core/renderer_vulkan/vk_compute_pass.h34
-rw-r--r--src/video_core/renderer_vulkan/vk_compute_pipeline.cpp296
-rw-r--r--src/video_core/renderer_vulkan/vk_compute_pipeline.h72
-rw-r--r--src/video_core/renderer_vulkan/vk_descriptor_pool.cpp172
-rw-r--r--src/video_core/renderer_vulkan/vk_descriptor_pool.h70
-rw-r--r--src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp839
-rw-r--r--src/video_core/renderer_vulkan/vk_graphics_pipeline.h145
-rw-r--r--src/video_core/renderer_vulkan/vk_master_semaphore.h6
-rw-r--r--src/video_core/renderer_vulkan/vk_pipeline_cache.cpp867
-rw-r--r--src/video_core/renderer_vulkan/vk_pipeline_cache.h176
-rw-r--r--src/video_core/renderer_vulkan/vk_query_cache.cpp8
-rw-r--r--src/video_core/renderer_vulkan/vk_rasterizer.cpp475
-rw-r--r--src/video_core/renderer_vulkan/vk_rasterizer.h59
-rw-r--r--src/video_core/renderer_vulkan/vk_render_pass_cache.cpp96
-rw-r--r--src/video_core/renderer_vulkan/vk_render_pass_cache.h55
-rw-r--r--src/video_core/renderer_vulkan/vk_resource_pool.cpp12
-rw-r--r--src/video_core/renderer_vulkan/vk_resource_pool.h12
-rw-r--r--src/video_core/renderer_vulkan/vk_scheduler.cpp172
-rw-r--r--src/video_core/renderer_vulkan/vk_scheduler.h38
-rw-r--r--src/video_core/renderer_vulkan/vk_shader_decompiler.cpp3166
-rw-r--r--src/video_core/renderer_vulkan/vk_shader_decompiler.h99
-rw-r--r--src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp2
-rw-r--r--src/video_core/renderer_vulkan/vk_state_tracker.cpp56
-rw-r--r--src/video_core/renderer_vulkan/vk_state_tracker.h15
-rw-r--r--src/video_core/renderer_vulkan/vk_swapchain.cpp59
-rw-r--r--src/video_core/renderer_vulkan/vk_swapchain.h31
-rw-r--r--src/video_core/renderer_vulkan/vk_texture_cache.cpp243
-rw-r--r--src/video_core/renderer_vulkan/vk_texture_cache.h80
-rw-r--r--src/video_core/renderer_vulkan/vk_update_descriptor.cpp13
-rw-r--r--src/video_core/renderer_vulkan/vk_update_descriptor.h4
-rw-r--r--src/video_core/shader/ast.cpp752
-rw-r--r--src/video_core/shader/ast.h398
-rw-r--r--src/video_core/shader/async_shaders.cpp234
-rw-r--r--src/video_core/shader/async_shaders.h138
-rw-r--r--src/video_core/shader/compiler_settings.cpp26
-rw-r--r--src/video_core/shader/compiler_settings.h26
-rw-r--r--src/video_core/shader/control_flow.cpp751
-rw-r--r--src/video_core/shader/control_flow.h117
-rw-r--r--src/video_core/shader/decode.cpp368
-rw-r--r--src/video_core/shader/decode/arithmetic.cpp166
-rw-r--r--src/video_core/shader/decode/arithmetic_half.cpp101
-rw-r--r--src/video_core/shader/decode/arithmetic_half_immediate.cpp54
-rw-r--r--src/video_core/shader/decode/arithmetic_immediate.cpp53
-rw-r--r--src/video_core/shader/decode/arithmetic_integer.cpp375
-rw-r--r--src/video_core/shader/decode/arithmetic_integer_immediate.cpp99
-rw-r--r--src/video_core/shader/decode/bfe.cpp77
-rw-r--r--src/video_core/shader/decode/bfi.cpp45
-rw-r--r--src/video_core/shader/decode/conversion.cpp321
-rw-r--r--src/video_core/shader/decode/ffma.cpp62
-rw-r--r--src/video_core/shader/decode/float_set.cpp58
-rw-r--r--src/video_core/shader/decode/float_set_predicate.cpp57
-rw-r--r--src/video_core/shader/decode/half_set.cpp115
-rw-r--r--src/video_core/shader/decode/half_set_predicate.cpp80
-rw-r--r--src/video_core/shader/decode/hfma2.cpp73
-rw-r--r--src/video_core/shader/decode/image.cpp536
-rw-r--r--src/video_core/shader/decode/integer_set.cpp49
-rw-r--r--src/video_core/shader/decode/integer_set_predicate.cpp53
-rw-r--r--src/video_core/shader/decode/memory.cpp493
-rw-r--r--src/video_core/shader/decode/other.cpp322
-rw-r--r--src/video_core/shader/decode/predicate_set_predicate.cpp68
-rw-r--r--src/video_core/shader/decode/predicate_set_register.cpp46
-rw-r--r--src/video_core/shader/decode/register_set_predicate.cpp86
-rw-r--r--src/video_core/shader/decode/shift.cpp153
-rw-r--r--src/video_core/shader/decode/texture.cpp935
-rw-r--r--src/video_core/shader/decode/video.cpp169
-rw-r--r--src/video_core/shader/decode/warp.cpp117
-rw-r--r--src/video_core/shader/decode/xmad.cpp156
-rw-r--r--src/video_core/shader/expr.cpp93
-rw-r--r--src/video_core/shader/expr.h156
-rw-r--r--src/video_core/shader/memory_util.cpp76
-rw-r--r--src/video_core/shader/memory_util.h43
-rw-r--r--src/video_core/shader/node.h701
-rw-r--r--src/video_core/shader/node_helper.cpp115
-rw-r--r--src/video_core/shader/node_helper.h71
-rw-r--r--src/video_core/shader/registry.cpp181
-rw-r--r--src/video_core/shader/registry.h172
-rw-r--r--src/video_core/shader/shader_ir.cpp464
-rw-r--r--src/video_core/shader/shader_ir.h479
-rw-r--r--src/video_core/shader/track.cpp236
-rw-r--r--src/video_core/shader/transform_feedback.cpp115
-rw-r--r--src/video_core/shader/transform_feedback.h23
-rw-r--r--src/video_core/shader_cache.cpp250
-rw-r--r--src/video_core/shader_cache.h215
-rw-r--r--src/video_core/shader_environment.cpp460
-rw-r--r--src/video_core/shader_environment.h183
-rw-r--r--src/video_core/shader_notify.cpp51
-rw-r--r--src/video_core/shader_notify.h28
-rw-r--r--src/video_core/texture_cache/formatter.cpp4
-rw-r--r--src/video_core/texture_cache/formatter.h3
-rw-r--r--src/video_core/texture_cache/image_view_base.cpp9
-rw-r--r--src/video_core/texture_cache/image_view_base.h1
-rw-r--r--src/video_core/texture_cache/texture_cache.h35
-rw-r--r--src/video_core/textures/texture.h9
-rw-r--r--src/video_core/transform_feedback.cpp99
-rw-r--r--src/video_core/transform_feedback.h30
-rw-r--r--src/video_core/vulkan_common/nsight_aftermath_tracker.cpp7
-rw-r--r--src/video_core/vulkan_common/nsight_aftermath_tracker.h21
-rw-r--r--src/video_core/vulkan_common/vulkan_device.cpp362
-rw-r--r--src/video_core/vulkan_common/vulkan_device.h161
-rw-r--r--src/video_core/vulkan_common/vulkan_wrapper.cpp5
-rw-r--r--src/video_core/vulkan_common/vulkan_wrapper.h44
163 files changed, 7358 insertions, 26949 deletions
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt
index e4de55f4d..007ecc13e 100644
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -29,7 +29,6 @@ add_library(video_core STATIC
29 dirty_flags.h 29 dirty_flags.h
30 dma_pusher.cpp 30 dma_pusher.cpp
31 dma_pusher.h 31 dma_pusher.h
32 engines/const_buffer_engine_interface.h
33 engines/const_buffer_info.h 32 engines/const_buffer_info.h
34 engines/engine_interface.h 33 engines/engine_interface.h
35 engines/engine_upload.cpp 34 engines/engine_upload.cpp
@@ -44,9 +43,6 @@ add_library(video_core STATIC
44 engines/maxwell_3d.h 43 engines/maxwell_3d.h
45 engines/maxwell_dma.cpp 44 engines/maxwell_dma.cpp
46 engines/maxwell_dma.h 45 engines/maxwell_dma.h
47 engines/shader_bytecode.h
48 engines/shader_header.h
49 engines/shader_type.h
50 framebuffer_config.h 46 framebuffer_config.h
51 macro/macro.cpp 47 macro/macro.cpp
52 macro/macro.h 48 macro/macro.h
@@ -61,8 +57,6 @@ add_library(video_core STATIC
61 gpu.h 57 gpu.h
62 gpu_thread.cpp 58 gpu_thread.cpp
63 gpu_thread.h 59 gpu_thread.h
64 guest_driver.cpp
65 guest_driver.h
66 memory_manager.cpp 60 memory_manager.cpp
67 memory_manager.h 61 memory_manager.h
68 query_cache.h 62 query_cache.h
@@ -71,26 +65,25 @@ add_library(video_core STATIC
71 rasterizer_interface.h 65 rasterizer_interface.h
72 renderer_base.cpp 66 renderer_base.cpp
73 renderer_base.h 67 renderer_base.h
74 renderer_opengl/gl_arb_decompiler.cpp
75 renderer_opengl/gl_arb_decompiler.h
76 renderer_opengl/gl_buffer_cache.cpp 68 renderer_opengl/gl_buffer_cache.cpp
77 renderer_opengl/gl_buffer_cache.h 69 renderer_opengl/gl_buffer_cache.h
70 renderer_opengl/gl_compute_pipeline.cpp
71 renderer_opengl/gl_compute_pipeline.h
78 renderer_opengl/gl_device.cpp 72 renderer_opengl/gl_device.cpp
79 renderer_opengl/gl_device.h 73 renderer_opengl/gl_device.h
80 renderer_opengl/gl_fence_manager.cpp 74 renderer_opengl/gl_fence_manager.cpp
81 renderer_opengl/gl_fence_manager.h 75 renderer_opengl/gl_fence_manager.h
76 renderer_opengl/gl_graphics_pipeline.cpp
77 renderer_opengl/gl_graphics_pipeline.h
82 renderer_opengl/gl_rasterizer.cpp 78 renderer_opengl/gl_rasterizer.cpp
83 renderer_opengl/gl_rasterizer.h 79 renderer_opengl/gl_rasterizer.h
84 renderer_opengl/gl_resource_manager.cpp 80 renderer_opengl/gl_resource_manager.cpp
85 renderer_opengl/gl_resource_manager.h 81 renderer_opengl/gl_resource_manager.h
86 renderer_opengl/gl_shader_cache.cpp 82 renderer_opengl/gl_shader_cache.cpp
87 renderer_opengl/gl_shader_cache.h 83 renderer_opengl/gl_shader_cache.h
88 renderer_opengl/gl_shader_decompiler.cpp
89 renderer_opengl/gl_shader_decompiler.h
90 renderer_opengl/gl_shader_disk_cache.cpp
91 renderer_opengl/gl_shader_disk_cache.h
92 renderer_opengl/gl_shader_manager.cpp 84 renderer_opengl/gl_shader_manager.cpp
93 renderer_opengl/gl_shader_manager.h 85 renderer_opengl/gl_shader_manager.h
86 renderer_opengl/gl_shader_context.h
94 renderer_opengl/gl_shader_util.cpp 87 renderer_opengl/gl_shader_util.cpp
95 renderer_opengl/gl_shader_util.h 88 renderer_opengl/gl_shader_util.h
96 renderer_opengl/gl_state_tracker.cpp 89 renderer_opengl/gl_state_tracker.cpp
@@ -112,6 +105,7 @@ add_library(video_core STATIC
112 renderer_vulkan/fixed_pipeline_state.h 105 renderer_vulkan/fixed_pipeline_state.h
113 renderer_vulkan/maxwell_to_vk.cpp 106 renderer_vulkan/maxwell_to_vk.cpp
114 renderer_vulkan/maxwell_to_vk.h 107 renderer_vulkan/maxwell_to_vk.h
108 renderer_vulkan/pipeline_helper.h
115 renderer_vulkan/renderer_vulkan.h 109 renderer_vulkan/renderer_vulkan.h
116 renderer_vulkan/renderer_vulkan.cpp 110 renderer_vulkan/renderer_vulkan.cpp
117 renderer_vulkan/vk_blit_screen.cpp 111 renderer_vulkan/vk_blit_screen.cpp
@@ -138,12 +132,12 @@ add_library(video_core STATIC
138 renderer_vulkan/vk_query_cache.h 132 renderer_vulkan/vk_query_cache.h
139 renderer_vulkan/vk_rasterizer.cpp 133 renderer_vulkan/vk_rasterizer.cpp
140 renderer_vulkan/vk_rasterizer.h 134 renderer_vulkan/vk_rasterizer.h
135 renderer_vulkan/vk_render_pass_cache.cpp
136 renderer_vulkan/vk_render_pass_cache.h
141 renderer_vulkan/vk_resource_pool.cpp 137 renderer_vulkan/vk_resource_pool.cpp
142 renderer_vulkan/vk_resource_pool.h 138 renderer_vulkan/vk_resource_pool.h
143 renderer_vulkan/vk_scheduler.cpp 139 renderer_vulkan/vk_scheduler.cpp
144 renderer_vulkan/vk_scheduler.h 140 renderer_vulkan/vk_scheduler.h
145 renderer_vulkan/vk_shader_decompiler.cpp
146 renderer_vulkan/vk_shader_decompiler.h
147 renderer_vulkan/vk_shader_util.cpp 141 renderer_vulkan/vk_shader_util.cpp
148 renderer_vulkan/vk_shader_util.h 142 renderer_vulkan/vk_shader_util.h
149 renderer_vulkan/vk_staging_buffer_pool.cpp 143 renderer_vulkan/vk_staging_buffer_pool.cpp
@@ -156,60 +150,12 @@ add_library(video_core STATIC
156 renderer_vulkan/vk_texture_cache.h 150 renderer_vulkan/vk_texture_cache.h
157 renderer_vulkan/vk_update_descriptor.cpp 151 renderer_vulkan/vk_update_descriptor.cpp
158 renderer_vulkan/vk_update_descriptor.h 152 renderer_vulkan/vk_update_descriptor.h
153 shader_cache.cpp
159 shader_cache.h 154 shader_cache.h
155 shader_environment.cpp
156 shader_environment.h
160 shader_notify.cpp 157 shader_notify.cpp
161 shader_notify.h 158 shader_notify.h
162 shader/decode/arithmetic.cpp
163 shader/decode/arithmetic_immediate.cpp
164 shader/decode/bfe.cpp
165 shader/decode/bfi.cpp
166 shader/decode/shift.cpp
167 shader/decode/arithmetic_integer.cpp
168 shader/decode/arithmetic_integer_immediate.cpp
169 shader/decode/arithmetic_half.cpp
170 shader/decode/arithmetic_half_immediate.cpp
171 shader/decode/ffma.cpp
172 shader/decode/hfma2.cpp
173 shader/decode/conversion.cpp
174 shader/decode/memory.cpp
175 shader/decode/texture.cpp
176 shader/decode/image.cpp
177 shader/decode/float_set_predicate.cpp
178 shader/decode/integer_set_predicate.cpp
179 shader/decode/half_set_predicate.cpp
180 shader/decode/predicate_set_register.cpp
181 shader/decode/predicate_set_predicate.cpp
182 shader/decode/register_set_predicate.cpp
183 shader/decode/float_set.cpp
184 shader/decode/integer_set.cpp
185 shader/decode/half_set.cpp
186 shader/decode/video.cpp
187 shader/decode/warp.cpp
188 shader/decode/xmad.cpp
189 shader/decode/other.cpp
190 shader/ast.cpp
191 shader/ast.h
192 shader/async_shaders.cpp
193 shader/async_shaders.h
194 shader/compiler_settings.cpp
195 shader/compiler_settings.h
196 shader/control_flow.cpp
197 shader/control_flow.h
198 shader/decode.cpp
199 shader/expr.cpp
200 shader/expr.h
201 shader/memory_util.cpp
202 shader/memory_util.h
203 shader/node_helper.cpp
204 shader/node_helper.h
205 shader/node.h
206 shader/registry.cpp
207 shader/registry.h
208 shader/shader_ir.cpp
209 shader/shader_ir.h
210 shader/track.cpp
211 shader/transform_feedback.cpp
212 shader/transform_feedback.h
213 surface.cpp 159 surface.cpp
214 surface.h 160 surface.h
215 texture_cache/accelerated_swizzle.cpp 161 texture_cache/accelerated_swizzle.cpp
@@ -242,6 +188,8 @@ add_library(video_core STATIC
242 textures/decoders.h 188 textures/decoders.h
243 textures/texture.cpp 189 textures/texture.cpp
244 textures/texture.h 190 textures/texture.h
191 transform_feedback.cpp
192 transform_feedback.h
245 video_core.cpp 193 video_core.cpp
246 video_core.h 194 video_core.h
247 vulkan_common/vulkan_debug_callback.cpp 195 vulkan_common/vulkan_debug_callback.cpp
@@ -265,7 +213,7 @@ add_library(video_core STATIC
265create_target_directory_groups(video_core) 213create_target_directory_groups(video_core)
266 214
267target_link_libraries(video_core PUBLIC common core) 215target_link_libraries(video_core PUBLIC common core)
268target_link_libraries(video_core PRIVATE glad xbyak) 216target_link_libraries(video_core PUBLIC glad shader_recompiler xbyak)
269 217
270if (YUZU_USE_BUNDLED_FFMPEG AND NOT WIN32) 218if (YUZU_USE_BUNDLED_FFMPEG AND NOT WIN32)
271 add_dependencies(video_core ffmpeg-build) 219 add_dependencies(video_core ffmpeg-build)
diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h
index 5a0b6f0c0..24c858104 100644
--- a/src/video_core/buffer_cache/buffer_cache.h
+++ b/src/video_core/buffer_cache/buffer_cache.h
@@ -31,6 +31,7 @@
31#include "video_core/engines/maxwell_3d.h" 31#include "video_core/engines/maxwell_3d.h"
32#include "video_core/memory_manager.h" 32#include "video_core/memory_manager.h"
33#include "video_core/rasterizer_interface.h" 33#include "video_core/rasterizer_interface.h"
34#include "video_core/surface.h"
34#include "video_core/texture_cache/slot_vector.h" 35#include "video_core/texture_cache/slot_vector.h"
35#include "video_core/texture_cache/types.h" 36#include "video_core/texture_cache/types.h"
36 37
@@ -42,14 +43,19 @@ MICROPROFILE_DECLARE(GPU_DownloadMemory);
42 43
43using BufferId = SlotId; 44using BufferId = SlotId;
44 45
46using VideoCore::Surface::PixelFormat;
47using namespace Common::Literals;
48
45constexpr u32 NUM_VERTEX_BUFFERS = 32; 49constexpr u32 NUM_VERTEX_BUFFERS = 32;
46constexpr u32 NUM_TRANSFORM_FEEDBACK_BUFFERS = 4; 50constexpr u32 NUM_TRANSFORM_FEEDBACK_BUFFERS = 4;
47constexpr u32 NUM_GRAPHICS_UNIFORM_BUFFERS = 18; 51constexpr u32 NUM_GRAPHICS_UNIFORM_BUFFERS = 18;
48constexpr u32 NUM_COMPUTE_UNIFORM_BUFFERS = 8; 52constexpr u32 NUM_COMPUTE_UNIFORM_BUFFERS = 8;
49constexpr u32 NUM_STORAGE_BUFFERS = 16; 53constexpr u32 NUM_STORAGE_BUFFERS = 16;
54constexpr u32 NUM_TEXTURE_BUFFERS = 16;
50constexpr u32 NUM_STAGES = 5; 55constexpr u32 NUM_STAGES = 5;
51 56
52using namespace Common::Literals; 57using UniformBufferSizes = std::array<std::array<u32, NUM_GRAPHICS_UNIFORM_BUFFERS>, NUM_STAGES>;
58using ComputeUniformBufferSizes = std::array<u32, NUM_COMPUTE_UNIFORM_BUFFERS>;
53 59
54template <typename P> 60template <typename P>
55class BufferCache { 61class BufferCache {
@@ -67,6 +73,7 @@ class BufferCache {
67 static constexpr bool NEEDS_BIND_UNIFORM_INDEX = P::NEEDS_BIND_UNIFORM_INDEX; 73 static constexpr bool NEEDS_BIND_UNIFORM_INDEX = P::NEEDS_BIND_UNIFORM_INDEX;
68 static constexpr bool NEEDS_BIND_STORAGE_INDEX = P::NEEDS_BIND_STORAGE_INDEX; 74 static constexpr bool NEEDS_BIND_STORAGE_INDEX = P::NEEDS_BIND_STORAGE_INDEX;
69 static constexpr bool USE_MEMORY_MAPS = P::USE_MEMORY_MAPS; 75 static constexpr bool USE_MEMORY_MAPS = P::USE_MEMORY_MAPS;
76 static constexpr bool SEPARATE_IMAGE_BUFFERS_BINDINGS = P::SEPARATE_IMAGE_BUFFER_BINDINGS;
70 77
71 static constexpr BufferId NULL_BUFFER_ID{0}; 78 static constexpr BufferId NULL_BUFFER_ID{0};
72 79
@@ -96,6 +103,10 @@ class BufferCache {
96 BufferId buffer_id; 103 BufferId buffer_id;
97 }; 104 };
98 105
106 struct TextureBufferBinding : Binding {
107 PixelFormat format;
108 };
109
99 static constexpr Binding NULL_BINDING{ 110 static constexpr Binding NULL_BINDING{
100 .cpu_addr = 0, 111 .cpu_addr = 0,
101 .size = 0, 112 .size = 0,
@@ -133,20 +144,31 @@ public:
133 144
134 void BindHostComputeBuffers(); 145 void BindHostComputeBuffers();
135 146
136 void SetEnabledUniformBuffers(size_t stage, u32 enabled); 147 void SetUniformBuffersState(const std::array<u32, NUM_STAGES>& mask,
148 const UniformBufferSizes* sizes);
137 149
138 void SetEnabledComputeUniformBuffers(u32 enabled); 150 void SetComputeUniformBufferState(u32 mask, const ComputeUniformBufferSizes* sizes);
139 151
140 void UnbindGraphicsStorageBuffers(size_t stage); 152 void UnbindGraphicsStorageBuffers(size_t stage);
141 153
142 void BindGraphicsStorageBuffer(size_t stage, size_t ssbo_index, u32 cbuf_index, u32 cbuf_offset, 154 void BindGraphicsStorageBuffer(size_t stage, size_t ssbo_index, u32 cbuf_index, u32 cbuf_offset,
143 bool is_written); 155 bool is_written);
144 156
157 void UnbindGraphicsTextureBuffers(size_t stage);
158
159 void BindGraphicsTextureBuffer(size_t stage, size_t tbo_index, GPUVAddr gpu_addr, u32 size,
160 PixelFormat format, bool is_written, bool is_image);
161
145 void UnbindComputeStorageBuffers(); 162 void UnbindComputeStorageBuffers();
146 163
147 void BindComputeStorageBuffer(size_t ssbo_index, u32 cbuf_index, u32 cbuf_offset, 164 void BindComputeStorageBuffer(size_t ssbo_index, u32 cbuf_index, u32 cbuf_offset,
148 bool is_written); 165 bool is_written);
149 166
167 void UnbindComputeTextureBuffers();
168
169 void BindComputeTextureBuffer(size_t tbo_index, GPUVAddr gpu_addr, u32 size, PixelFormat format,
170 bool is_written, bool is_image);
171
150 void FlushCachedWrites(); 172 void FlushCachedWrites();
151 173
152 /// Return true when there are uncommitted buffers to be downloaded 174 /// Return true when there are uncommitted buffers to be downloaded
@@ -178,6 +200,7 @@ public:
178 [[nodiscard]] bool IsRegionCpuModified(VAddr addr, size_t size); 200 [[nodiscard]] bool IsRegionCpuModified(VAddr addr, size_t size);
179 201
180 std::mutex mutex; 202 std::mutex mutex;
203 Runtime& runtime;
181 204
182private: 205private:
183 template <typename Func> 206 template <typename Func>
@@ -254,12 +277,16 @@ private:
254 277
255 void BindHostGraphicsStorageBuffers(size_t stage); 278 void BindHostGraphicsStorageBuffers(size_t stage);
256 279
280 void BindHostGraphicsTextureBuffers(size_t stage);
281
257 void BindHostTransformFeedbackBuffers(); 282 void BindHostTransformFeedbackBuffers();
258 283
259 void BindHostComputeUniformBuffers(); 284 void BindHostComputeUniformBuffers();
260 285
261 void BindHostComputeStorageBuffers(); 286 void BindHostComputeStorageBuffers();
262 287
288 void BindHostComputeTextureBuffers();
289
263 void DoUpdateGraphicsBuffers(bool is_indexed); 290 void DoUpdateGraphicsBuffers(bool is_indexed);
264 291
265 void DoUpdateComputeBuffers(); 292 void DoUpdateComputeBuffers();
@@ -274,6 +301,8 @@ private:
274 301
275 void UpdateStorageBuffers(size_t stage); 302 void UpdateStorageBuffers(size_t stage);
276 303
304 void UpdateTextureBuffers(size_t stage);
305
277 void UpdateTransformFeedbackBuffers(); 306 void UpdateTransformFeedbackBuffers();
278 307
279 void UpdateTransformFeedbackBuffer(u32 index); 308 void UpdateTransformFeedbackBuffer(u32 index);
@@ -282,6 +311,8 @@ private:
282 311
283 void UpdateComputeStorageBuffers(); 312 void UpdateComputeStorageBuffers();
284 313
314 void UpdateComputeTextureBuffers();
315
285 void MarkWrittenBuffer(BufferId buffer_id, VAddr cpu_addr, u32 size); 316 void MarkWrittenBuffer(BufferId buffer_id, VAddr cpu_addr, u32 size);
286 317
287 [[nodiscard]] BufferId FindBuffer(VAddr cpu_addr, u32 size); 318 [[nodiscard]] BufferId FindBuffer(VAddr cpu_addr, u32 size);
@@ -323,6 +354,9 @@ private:
323 354
324 [[nodiscard]] Binding StorageBufferBinding(GPUVAddr ssbo_addr) const; 355 [[nodiscard]] Binding StorageBufferBinding(GPUVAddr ssbo_addr) const;
325 356
357 [[nodiscard]] TextureBufferBinding GetTextureBufferBinding(GPUVAddr gpu_addr, u32 size,
358 PixelFormat format);
359
326 [[nodiscard]] std::span<const u8> ImmediateBufferWithData(VAddr cpu_addr, size_t size); 360 [[nodiscard]] std::span<const u8> ImmediateBufferWithData(VAddr cpu_addr, size_t size);
327 361
328 [[nodiscard]] std::span<u8> ImmediateBuffer(size_t wanted_capacity); 362 [[nodiscard]] std::span<u8> ImmediateBuffer(size_t wanted_capacity);
@@ -336,7 +370,6 @@ private:
336 Tegra::Engines::KeplerCompute& kepler_compute; 370 Tegra::Engines::KeplerCompute& kepler_compute;
337 Tegra::MemoryManager& gpu_memory; 371 Tegra::MemoryManager& gpu_memory;
338 Core::Memory::Memory& cpu_memory; 372 Core::Memory::Memory& cpu_memory;
339 Runtime& runtime;
340 373
341 SlotVector<Buffer> slot_buffers; 374 SlotVector<Buffer> slot_buffers;
342 DelayedDestructionRing<Buffer, 8> delayed_destruction_ring; 375 DelayedDestructionRing<Buffer, 8> delayed_destruction_ring;
@@ -347,20 +380,30 @@ private:
347 std::array<Binding, NUM_VERTEX_BUFFERS> vertex_buffers; 380 std::array<Binding, NUM_VERTEX_BUFFERS> vertex_buffers;
348 std::array<std::array<Binding, NUM_GRAPHICS_UNIFORM_BUFFERS>, NUM_STAGES> uniform_buffers; 381 std::array<std::array<Binding, NUM_GRAPHICS_UNIFORM_BUFFERS>, NUM_STAGES> uniform_buffers;
349 std::array<std::array<Binding, NUM_STORAGE_BUFFERS>, NUM_STAGES> storage_buffers; 382 std::array<std::array<Binding, NUM_STORAGE_BUFFERS>, NUM_STAGES> storage_buffers;
383 std::array<std::array<TextureBufferBinding, NUM_TEXTURE_BUFFERS>, NUM_STAGES> texture_buffers;
350 std::array<Binding, NUM_TRANSFORM_FEEDBACK_BUFFERS> transform_feedback_buffers; 384 std::array<Binding, NUM_TRANSFORM_FEEDBACK_BUFFERS> transform_feedback_buffers;
351 385
352 std::array<Binding, NUM_COMPUTE_UNIFORM_BUFFERS> compute_uniform_buffers; 386 std::array<Binding, NUM_COMPUTE_UNIFORM_BUFFERS> compute_uniform_buffers;
353 std::array<Binding, NUM_STORAGE_BUFFERS> compute_storage_buffers; 387 std::array<Binding, NUM_STORAGE_BUFFERS> compute_storage_buffers;
388 std::array<TextureBufferBinding, NUM_TEXTURE_BUFFERS> compute_texture_buffers;
389
390 std::array<u32, NUM_STAGES> enabled_uniform_buffer_masks{};
391 u32 enabled_compute_uniform_buffer_mask = 0;
354 392
355 std::array<u32, NUM_STAGES> enabled_uniform_buffers{}; 393 const UniformBufferSizes* uniform_buffer_sizes{};
356 u32 enabled_compute_uniform_buffers = 0; 394 const ComputeUniformBufferSizes* compute_uniform_buffer_sizes{};
357 395
358 std::array<u32, NUM_STAGES> enabled_storage_buffers{}; 396 std::array<u32, NUM_STAGES> enabled_storage_buffers{};
359 std::array<u32, NUM_STAGES> written_storage_buffers{}; 397 std::array<u32, NUM_STAGES> written_storage_buffers{};
360 u32 enabled_compute_storage_buffers = 0; 398 u32 enabled_compute_storage_buffers = 0;
361 u32 written_compute_storage_buffers = 0; 399 u32 written_compute_storage_buffers = 0;
362 400
363 std::array<u32, NUM_STAGES> fast_bound_uniform_buffers{}; 401 std::array<u32, NUM_STAGES> enabled_texture_buffers{};
402 std::array<u32, NUM_STAGES> written_texture_buffers{};
403 std::array<u32, NUM_STAGES> image_texture_buffers{};
404 u32 enabled_compute_texture_buffers = 0;
405 u32 written_compute_texture_buffers = 0;
406 u32 image_compute_texture_buffers = 0;
364 407
365 std::array<u32, 16> uniform_cache_hits{}; 408 std::array<u32, 16> uniform_cache_hits{};
366 std::array<u32, 16> uniform_cache_shots{}; 409 std::array<u32, 16> uniform_cache_shots{};
@@ -371,6 +414,10 @@ private:
371 414
372 std::conditional_t<HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS, std::array<u32, NUM_STAGES>, Empty> 415 std::conditional_t<HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS, std::array<u32, NUM_STAGES>, Empty>
373 dirty_uniform_buffers{}; 416 dirty_uniform_buffers{};
417 std::conditional_t<IS_OPENGL, std::array<u32, NUM_STAGES>, Empty> fast_bound_uniform_buffers{};
418 std::conditional_t<HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS,
419 std::array<std::array<u32, NUM_GRAPHICS_UNIFORM_BUFFERS>, NUM_STAGES>, Empty>
420 uniform_buffer_binding_sizes{};
374 421
375 std::vector<BufferId> cached_write_buffer_ids; 422 std::vector<BufferId> cached_write_buffer_ids;
376 423
@@ -394,8 +441,8 @@ BufferCache<P>::BufferCache(VideoCore::RasterizerInterface& rasterizer_,
394 Tegra::Engines::KeplerCompute& kepler_compute_, 441 Tegra::Engines::KeplerCompute& kepler_compute_,
395 Tegra::MemoryManager& gpu_memory_, Core::Memory::Memory& cpu_memory_, 442 Tegra::MemoryManager& gpu_memory_, Core::Memory::Memory& cpu_memory_,
396 Runtime& runtime_) 443 Runtime& runtime_)
397 : rasterizer{rasterizer_}, maxwell3d{maxwell3d_}, kepler_compute{kepler_compute_}, 444 : runtime{runtime_}, rasterizer{rasterizer_}, maxwell3d{maxwell3d_},
398 gpu_memory{gpu_memory_}, cpu_memory{cpu_memory_}, runtime{runtime_} { 445 kepler_compute{kepler_compute_}, gpu_memory{gpu_memory_}, cpu_memory{cpu_memory_} {
399 // Ensure the first slot is used for the null buffer 446 // Ensure the first slot is used for the null buffer
400 void(slot_buffers.insert(runtime, NullBufferParams{})); 447 void(slot_buffers.insert(runtime, NullBufferParams{}));
401 deletion_iterator = slot_buffers.end(); 448 deletion_iterator = slot_buffers.end();
@@ -615,6 +662,7 @@ void BufferCache<P>::BindHostStageBuffers(size_t stage) {
615 MICROPROFILE_SCOPE(GPU_BindUploadBuffers); 662 MICROPROFILE_SCOPE(GPU_BindUploadBuffers);
616 BindHostGraphicsUniformBuffers(stage); 663 BindHostGraphicsUniformBuffers(stage);
617 BindHostGraphicsStorageBuffers(stage); 664 BindHostGraphicsStorageBuffers(stage);
665 BindHostGraphicsTextureBuffers(stage);
618} 666}
619 667
620template <class P> 668template <class P>
@@ -622,21 +670,30 @@ void BufferCache<P>::BindHostComputeBuffers() {
622 MICROPROFILE_SCOPE(GPU_BindUploadBuffers); 670 MICROPROFILE_SCOPE(GPU_BindUploadBuffers);
623 BindHostComputeUniformBuffers(); 671 BindHostComputeUniformBuffers();
624 BindHostComputeStorageBuffers(); 672 BindHostComputeStorageBuffers();
673 BindHostComputeTextureBuffers();
625} 674}
626 675
627template <class P> 676template <class P>
628void BufferCache<P>::SetEnabledUniformBuffers(size_t stage, u32 enabled) { 677void BufferCache<P>::SetUniformBuffersState(const std::array<u32, NUM_STAGES>& mask,
678 const UniformBufferSizes* sizes) {
629 if constexpr (HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS) { 679 if constexpr (HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS) {
630 if (enabled_uniform_buffers[stage] != enabled) { 680 if (enabled_uniform_buffer_masks != mask) {
631 dirty_uniform_buffers[stage] = ~u32{0}; 681 if constexpr (IS_OPENGL) {
682 fast_bound_uniform_buffers.fill(0);
683 }
684 dirty_uniform_buffers.fill(~u32{0});
685 uniform_buffer_binding_sizes.fill({});
632 } 686 }
633 } 687 }
634 enabled_uniform_buffers[stage] = enabled; 688 enabled_uniform_buffer_masks = mask;
689 uniform_buffer_sizes = sizes;
635} 690}
636 691
637template <class P> 692template <class P>
638void BufferCache<P>::SetEnabledComputeUniformBuffers(u32 enabled) { 693void BufferCache<P>::SetComputeUniformBufferState(u32 mask,
639 enabled_compute_uniform_buffers = enabled; 694 const ComputeUniformBufferSizes* sizes) {
695 enabled_compute_uniform_buffer_mask = mask;
696 compute_uniform_buffer_sizes = sizes;
640} 697}
641 698
642template <class P> 699template <class P>
@@ -657,9 +714,29 @@ void BufferCache<P>::BindGraphicsStorageBuffer(size_t stage, size_t ssbo_index,
657} 714}
658 715
659template <class P> 716template <class P>
717void BufferCache<P>::UnbindGraphicsTextureBuffers(size_t stage) {
718 enabled_texture_buffers[stage] = 0;
719 written_texture_buffers[stage] = 0;
720 image_texture_buffers[stage] = 0;
721}
722
723template <class P>
724void BufferCache<P>::BindGraphicsTextureBuffer(size_t stage, size_t tbo_index, GPUVAddr gpu_addr,
725 u32 size, PixelFormat format, bool is_written,
726 bool is_image) {
727 enabled_texture_buffers[stage] |= 1U << tbo_index;
728 written_texture_buffers[stage] |= (is_written ? 1U : 0U) << tbo_index;
729 if constexpr (SEPARATE_IMAGE_BUFFERS_BINDINGS) {
730 image_texture_buffers[stage] |= (is_image ? 1U : 0U) << tbo_index;
731 }
732 texture_buffers[stage][tbo_index] = GetTextureBufferBinding(gpu_addr, size, format);
733}
734
735template <class P>
660void BufferCache<P>::UnbindComputeStorageBuffers() { 736void BufferCache<P>::UnbindComputeStorageBuffers() {
661 enabled_compute_storage_buffers = 0; 737 enabled_compute_storage_buffers = 0;
662 written_compute_storage_buffers = 0; 738 written_compute_storage_buffers = 0;
739 image_compute_texture_buffers = 0;
663} 740}
664 741
665template <class P> 742template <class P>
@@ -677,6 +754,24 @@ void BufferCache<P>::BindComputeStorageBuffer(size_t ssbo_index, u32 cbuf_index,
677} 754}
678 755
679template <class P> 756template <class P>
757void BufferCache<P>::UnbindComputeTextureBuffers() {
758 enabled_compute_texture_buffers = 0;
759 written_compute_texture_buffers = 0;
760 image_compute_texture_buffers = 0;
761}
762
763template <class P>
764void BufferCache<P>::BindComputeTextureBuffer(size_t tbo_index, GPUVAddr gpu_addr, u32 size,
765 PixelFormat format, bool is_written, bool is_image) {
766 enabled_compute_texture_buffers |= 1U << tbo_index;
767 written_compute_texture_buffers |= (is_written ? 1U : 0U) << tbo_index;
768 if constexpr (SEPARATE_IMAGE_BUFFERS_BINDINGS) {
769 image_compute_texture_buffers |= (is_image ? 1U : 0U) << tbo_index;
770 }
771 compute_texture_buffers[tbo_index] = GetTextureBufferBinding(gpu_addr, size, format);
772}
773
774template <class P>
680void BufferCache<P>::FlushCachedWrites() { 775void BufferCache<P>::FlushCachedWrites() {
681 for (const BufferId buffer_id : cached_write_buffer_ids) { 776 for (const BufferId buffer_id : cached_write_buffer_ids) {
682 slot_buffers[buffer_id].FlushCachedWrites(); 777 slot_buffers[buffer_id].FlushCachedWrites();
@@ -901,7 +996,7 @@ void BufferCache<P>::BindHostGraphicsUniformBuffers(size_t stage) {
901 dirty = std::exchange(dirty_uniform_buffers[stage], 0); 996 dirty = std::exchange(dirty_uniform_buffers[stage], 0);
902 } 997 }
903 u32 binding_index = 0; 998 u32 binding_index = 0;
904 ForEachEnabledBit(enabled_uniform_buffers[stage], [&](u32 index) { 999 ForEachEnabledBit(enabled_uniform_buffer_masks[stage], [&](u32 index) {
905 const bool needs_bind = ((dirty >> index) & 1) != 0; 1000 const bool needs_bind = ((dirty >> index) & 1) != 0;
906 BindHostGraphicsUniformBuffer(stage, index, binding_index, needs_bind); 1001 BindHostGraphicsUniformBuffer(stage, index, binding_index, needs_bind);
907 if constexpr (NEEDS_BIND_UNIFORM_INDEX) { 1002 if constexpr (NEEDS_BIND_UNIFORM_INDEX) {
@@ -915,7 +1010,7 @@ void BufferCache<P>::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32
915 bool needs_bind) { 1010 bool needs_bind) {
916 const Binding& binding = uniform_buffers[stage][index]; 1011 const Binding& binding = uniform_buffers[stage][index];
917 const VAddr cpu_addr = binding.cpu_addr; 1012 const VAddr cpu_addr = binding.cpu_addr;
918 const u32 size = binding.size; 1013 const u32 size = std::min(binding.size, (*uniform_buffer_sizes)[stage][index]);
919 Buffer& buffer = slot_buffers[binding.buffer_id]; 1014 Buffer& buffer = slot_buffers[binding.buffer_id];
920 TouchBuffer(buffer); 1015 TouchBuffer(buffer);
921 const bool use_fast_buffer = binding.buffer_id != NULL_BUFFER_ID && 1016 const bool use_fast_buffer = binding.buffer_id != NULL_BUFFER_ID &&
@@ -925,8 +1020,13 @@ void BufferCache<P>::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32
925 if constexpr (IS_OPENGL) { 1020 if constexpr (IS_OPENGL) {
926 if (runtime.HasFastBufferSubData()) { 1021 if (runtime.HasFastBufferSubData()) {
927 // Fast path for Nvidia 1022 // Fast path for Nvidia
928 if (!HasFastUniformBufferBound(stage, binding_index)) { 1023 const bool should_fast_bind =
1024 !HasFastUniformBufferBound(stage, binding_index) ||
1025 uniform_buffer_binding_sizes[stage][binding_index] != size;
1026 if (should_fast_bind) {
929 // We only have to bind when the currently bound buffer is not the fast version 1027 // We only have to bind when the currently bound buffer is not the fast version
1028 fast_bound_uniform_buffers[stage] |= 1U << binding_index;
1029 uniform_buffer_binding_sizes[stage][binding_index] = size;
930 runtime.BindFastUniformBuffer(stage, binding_index, size); 1030 runtime.BindFastUniformBuffer(stage, binding_index, size);
931 } 1031 }
932 const auto span = ImmediateBufferWithData(cpu_addr, size); 1032 const auto span = ImmediateBufferWithData(cpu_addr, size);
@@ -934,8 +1034,10 @@ void BufferCache<P>::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32
934 return; 1034 return;
935 } 1035 }
936 } 1036 }
937 fast_bound_uniform_buffers[stage] |= 1U << binding_index; 1037 if constexpr (IS_OPENGL) {
938 1038 fast_bound_uniform_buffers[stage] |= 1U << binding_index;
1039 uniform_buffer_binding_sizes[stage][binding_index] = size;
1040 }
939 // Stream buffer path to avoid stalling on non-Nvidia drivers or Vulkan 1041 // Stream buffer path to avoid stalling on non-Nvidia drivers or Vulkan
940 const std::span<u8> span = runtime.BindMappedUniformBuffer(stage, binding_index, size); 1042 const std::span<u8> span = runtime.BindMappedUniformBuffer(stage, binding_index, size);
941 cpu_memory.ReadBlockUnsafe(cpu_addr, span.data(), size); 1043 cpu_memory.ReadBlockUnsafe(cpu_addr, span.data(), size);
@@ -948,14 +1050,27 @@ void BufferCache<P>::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32
948 } 1050 }
949 ++uniform_cache_shots[0]; 1051 ++uniform_cache_shots[0];
950 1052
951 if (!needs_bind && !HasFastUniformBufferBound(stage, binding_index)) { 1053 // Skip binding if it's not needed and if the bound buffer is not the fast version
952 // Skip binding if it's not needed and if the bound buffer is not the fast version 1054 // This exists to avoid instances where the fast buffer is bound and a GPU write happens
953 // This exists to avoid instances where the fast buffer is bound and a GPU write happens 1055 needs_bind |= HasFastUniformBufferBound(stage, binding_index);
1056 if constexpr (HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS) {
1057 needs_bind |= uniform_buffer_binding_sizes[stage][binding_index] != size;
1058 }
1059 if (!needs_bind) {
954 return; 1060 return;
955 } 1061 }
956 fast_bound_uniform_buffers[stage] &= ~(1U << binding_index);
957
958 const u32 offset = buffer.Offset(cpu_addr); 1062 const u32 offset = buffer.Offset(cpu_addr);
1063 if constexpr (IS_OPENGL) {
1064 // Fast buffer will be unbound
1065 fast_bound_uniform_buffers[stage] &= ~(1U << binding_index);
1066
1067 // Mark the index as dirty if offset doesn't match
1068 const bool is_copy_bind = offset != 0 && !runtime.SupportsNonZeroUniformOffset();
1069 dirty_uniform_buffers[stage] |= (is_copy_bind ? 1U : 0U) << index;
1070 }
1071 if constexpr (HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS) {
1072 uniform_buffer_binding_sizes[stage][binding_index] = size;
1073 }
959 if constexpr (NEEDS_BIND_UNIFORM_INDEX) { 1074 if constexpr (NEEDS_BIND_UNIFORM_INDEX) {
960 runtime.BindUniformBuffer(stage, binding_index, buffer, offset, size); 1075 runtime.BindUniformBuffer(stage, binding_index, buffer, offset, size);
961 } else { 1076 } else {
@@ -985,6 +1100,28 @@ void BufferCache<P>::BindHostGraphicsStorageBuffers(size_t stage) {
985} 1100}
986 1101
987template <class P> 1102template <class P>
1103void BufferCache<P>::BindHostGraphicsTextureBuffers(size_t stage) {
1104 ForEachEnabledBit(enabled_texture_buffers[stage], [&](u32 index) {
1105 const TextureBufferBinding& binding = texture_buffers[stage][index];
1106 Buffer& buffer = slot_buffers[binding.buffer_id];
1107 const u32 size = binding.size;
1108 SynchronizeBuffer(buffer, binding.cpu_addr, size);
1109
1110 const u32 offset = buffer.Offset(binding.cpu_addr);
1111 const PixelFormat format = binding.format;
1112 if constexpr (SEPARATE_IMAGE_BUFFERS_BINDINGS) {
1113 if (((image_texture_buffers[stage] >> index) & 1) != 0) {
1114 runtime.BindImageBuffer(buffer, offset, size, format);
1115 } else {
1116 runtime.BindTextureBuffer(buffer, offset, size, format);
1117 }
1118 } else {
1119 runtime.BindTextureBuffer(buffer, offset, size, format);
1120 }
1121 });
1122}
1123
1124template <class P>
988void BufferCache<P>::BindHostTransformFeedbackBuffers() { 1125void BufferCache<P>::BindHostTransformFeedbackBuffers() {
989 if (maxwell3d.regs.tfb_enabled == 0) { 1126 if (maxwell3d.regs.tfb_enabled == 0) {
990 return; 1127 return;
@@ -1006,13 +1143,14 @@ void BufferCache<P>::BindHostComputeUniformBuffers() {
1006 if constexpr (HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS) { 1143 if constexpr (HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS) {
1007 // Mark all uniform buffers as dirty 1144 // Mark all uniform buffers as dirty
1008 dirty_uniform_buffers.fill(~u32{0}); 1145 dirty_uniform_buffers.fill(~u32{0});
1146 fast_bound_uniform_buffers.fill(0);
1009 } 1147 }
1010 u32 binding_index = 0; 1148 u32 binding_index = 0;
1011 ForEachEnabledBit(enabled_compute_uniform_buffers, [&](u32 index) { 1149 ForEachEnabledBit(enabled_compute_uniform_buffer_mask, [&](u32 index) {
1012 const Binding& binding = compute_uniform_buffers[index]; 1150 const Binding& binding = compute_uniform_buffers[index];
1013 Buffer& buffer = slot_buffers[binding.buffer_id]; 1151 Buffer& buffer = slot_buffers[binding.buffer_id];
1014 TouchBuffer(buffer); 1152 TouchBuffer(buffer);
1015 const u32 size = binding.size; 1153 const u32 size = std::min(binding.size, (*compute_uniform_buffer_sizes)[index]);
1016 SynchronizeBuffer(buffer, binding.cpu_addr, size); 1154 SynchronizeBuffer(buffer, binding.cpu_addr, size);
1017 1155
1018 const u32 offset = buffer.Offset(binding.cpu_addr); 1156 const u32 offset = buffer.Offset(binding.cpu_addr);
@@ -1047,6 +1185,28 @@ void BufferCache<P>::BindHostComputeStorageBuffers() {
1047} 1185}
1048 1186
1049template <class P> 1187template <class P>
1188void BufferCache<P>::BindHostComputeTextureBuffers() {
1189 ForEachEnabledBit(enabled_compute_texture_buffers, [&](u32 index) {
1190 const TextureBufferBinding& binding = compute_texture_buffers[index];
1191 Buffer& buffer = slot_buffers[binding.buffer_id];
1192 const u32 size = binding.size;
1193 SynchronizeBuffer(buffer, binding.cpu_addr, size);
1194
1195 const u32 offset = buffer.Offset(binding.cpu_addr);
1196 const PixelFormat format = binding.format;
1197 if constexpr (SEPARATE_IMAGE_BUFFERS_BINDINGS) {
1198 if (((image_compute_texture_buffers >> index) & 1) != 0) {
1199 runtime.BindImageBuffer(buffer, offset, size, format);
1200 } else {
1201 runtime.BindTextureBuffer(buffer, offset, size, format);
1202 }
1203 } else {
1204 runtime.BindTextureBuffer(buffer, offset, size, format);
1205 }
1206 });
1207}
1208
1209template <class P>
1050void BufferCache<P>::DoUpdateGraphicsBuffers(bool is_indexed) { 1210void BufferCache<P>::DoUpdateGraphicsBuffers(bool is_indexed) {
1051 if (is_indexed) { 1211 if (is_indexed) {
1052 UpdateIndexBuffer(); 1212 UpdateIndexBuffer();
@@ -1056,6 +1216,7 @@ void BufferCache<P>::DoUpdateGraphicsBuffers(bool is_indexed) {
1056 for (size_t stage = 0; stage < NUM_STAGES; ++stage) { 1216 for (size_t stage = 0; stage < NUM_STAGES; ++stage) {
1057 UpdateUniformBuffers(stage); 1217 UpdateUniformBuffers(stage);
1058 UpdateStorageBuffers(stage); 1218 UpdateStorageBuffers(stage);
1219 UpdateTextureBuffers(stage);
1059 } 1220 }
1060} 1221}
1061 1222
@@ -1063,6 +1224,7 @@ template <class P>
1063void BufferCache<P>::DoUpdateComputeBuffers() { 1224void BufferCache<P>::DoUpdateComputeBuffers() {
1064 UpdateComputeUniformBuffers(); 1225 UpdateComputeUniformBuffers();
1065 UpdateComputeStorageBuffers(); 1226 UpdateComputeStorageBuffers();
1227 UpdateComputeTextureBuffers();
1066} 1228}
1067 1229
1068template <class P> 1230template <class P>
@@ -1132,7 +1294,7 @@ void BufferCache<P>::UpdateVertexBuffer(u32 index) {
1132 1294
1133template <class P> 1295template <class P>
1134void BufferCache<P>::UpdateUniformBuffers(size_t stage) { 1296void BufferCache<P>::UpdateUniformBuffers(size_t stage) {
1135 ForEachEnabledBit(enabled_uniform_buffers[stage], [&](u32 index) { 1297 ForEachEnabledBit(enabled_uniform_buffer_masks[stage], [&](u32 index) {
1136 Binding& binding = uniform_buffers[stage][index]; 1298 Binding& binding = uniform_buffers[stage][index];
1137 if (binding.buffer_id) { 1299 if (binding.buffer_id) {
1138 // Already updated 1300 // Already updated
@@ -1163,6 +1325,18 @@ void BufferCache<P>::UpdateStorageBuffers(size_t stage) {
1163} 1325}
1164 1326
1165template <class P> 1327template <class P>
1328void BufferCache<P>::UpdateTextureBuffers(size_t stage) {
1329 ForEachEnabledBit(enabled_texture_buffers[stage], [&](u32 index) {
1330 Binding& binding = texture_buffers[stage][index];
1331 binding.buffer_id = FindBuffer(binding.cpu_addr, binding.size);
1332 // Mark buffer as written if needed
1333 if (((written_texture_buffers[stage] >> index) & 1) != 0) {
1334 MarkWrittenBuffer(binding.buffer_id, binding.cpu_addr, binding.size);
1335 }
1336 });
1337}
1338
1339template <class P>
1166void BufferCache<P>::UpdateTransformFeedbackBuffers() { 1340void BufferCache<P>::UpdateTransformFeedbackBuffers() {
1167 if (maxwell3d.regs.tfb_enabled == 0) { 1341 if (maxwell3d.regs.tfb_enabled == 0) {
1168 return; 1342 return;
@@ -1193,7 +1367,7 @@ void BufferCache<P>::UpdateTransformFeedbackBuffer(u32 index) {
1193 1367
1194template <class P> 1368template <class P>
1195void BufferCache<P>::UpdateComputeUniformBuffers() { 1369void BufferCache<P>::UpdateComputeUniformBuffers() {
1196 ForEachEnabledBit(enabled_compute_uniform_buffers, [&](u32 index) { 1370 ForEachEnabledBit(enabled_compute_uniform_buffer_mask, [&](u32 index) {
1197 Binding& binding = compute_uniform_buffers[index]; 1371 Binding& binding = compute_uniform_buffers[index];
1198 binding = NULL_BINDING; 1372 binding = NULL_BINDING;
1199 const auto& launch_desc = kepler_compute.launch_description; 1373 const auto& launch_desc = kepler_compute.launch_description;
@@ -1214,11 +1388,22 @@ void BufferCache<P>::UpdateComputeStorageBuffers() {
1214 ForEachEnabledBit(enabled_compute_storage_buffers, [&](u32 index) { 1388 ForEachEnabledBit(enabled_compute_storage_buffers, [&](u32 index) {
1215 // Resolve buffer 1389 // Resolve buffer
1216 Binding& binding = compute_storage_buffers[index]; 1390 Binding& binding = compute_storage_buffers[index];
1217 const BufferId buffer_id = FindBuffer(binding.cpu_addr, binding.size); 1391 binding.buffer_id = FindBuffer(binding.cpu_addr, binding.size);
1218 binding.buffer_id = buffer_id;
1219 // Mark as written if needed 1392 // Mark as written if needed
1220 if (((written_compute_storage_buffers >> index) & 1) != 0) { 1393 if (((written_compute_storage_buffers >> index) & 1) != 0) {
1221 MarkWrittenBuffer(buffer_id, binding.cpu_addr, binding.size); 1394 MarkWrittenBuffer(binding.buffer_id, binding.cpu_addr, binding.size);
1395 }
1396 });
1397}
1398
1399template <class P>
1400void BufferCache<P>::UpdateComputeTextureBuffers() {
1401 ForEachEnabledBit(enabled_compute_texture_buffers, [&](u32 index) {
1402 Binding& binding = compute_texture_buffers[index];
1403 binding.buffer_id = FindBuffer(binding.cpu_addr, binding.size);
1404 // Mark as written if needed
1405 if (((written_compute_texture_buffers >> index) & 1) != 0) {
1406 MarkWrittenBuffer(binding.buffer_id, binding.cpu_addr, binding.size);
1222 } 1407 }
1223 }); 1408 });
1224} 1409}
@@ -1551,6 +1736,7 @@ template <class P>
1551void BufferCache<P>::NotifyBufferDeletion() { 1736void BufferCache<P>::NotifyBufferDeletion() {
1552 if constexpr (HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS) { 1737 if constexpr (HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS) {
1553 dirty_uniform_buffers.fill(~u32{0}); 1738 dirty_uniform_buffers.fill(~u32{0});
1739 uniform_buffer_binding_sizes.fill({});
1554 } 1740 }
1555 auto& flags = maxwell3d.dirty.flags; 1741 auto& flags = maxwell3d.dirty.flags;
1556 flags[Dirty::IndexBuffer] = true; 1742 flags[Dirty::IndexBuffer] = true;
@@ -1578,6 +1764,25 @@ typename BufferCache<P>::Binding BufferCache<P>::StorageBufferBinding(GPUVAddr s
1578} 1764}
1579 1765
1580template <class P> 1766template <class P>
1767typename BufferCache<P>::TextureBufferBinding BufferCache<P>::GetTextureBufferBinding(
1768 GPUVAddr gpu_addr, u32 size, PixelFormat format) {
1769 const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr);
1770 TextureBufferBinding binding;
1771 if (!cpu_addr || size == 0) {
1772 binding.cpu_addr = 0;
1773 binding.size = 0;
1774 binding.buffer_id = NULL_BUFFER_ID;
1775 binding.format = PixelFormat::Invalid;
1776 } else {
1777 binding.cpu_addr = *cpu_addr;
1778 binding.size = size;
1779 binding.buffer_id = BufferId{};
1780 binding.format = format;
1781 }
1782 return binding;
1783}
1784
1785template <class P>
1581std::span<const u8> BufferCache<P>::ImmediateBufferWithData(VAddr cpu_addr, size_t size) { 1786std::span<const u8> BufferCache<P>::ImmediateBufferWithData(VAddr cpu_addr, size_t size) {
1582 u8* const base_pointer = cpu_memory.GetPointer(cpu_addr); 1787 u8* const base_pointer = cpu_memory.GetPointer(cpu_addr);
1583 if (IsRangeGranular(cpu_addr, size) || 1788 if (IsRangeGranular(cpu_addr, size) ||
diff --git a/src/video_core/dirty_flags.cpp b/src/video_core/dirty_flags.cpp
index 7149af290..b1be065c3 100644
--- a/src/video_core/dirty_flags.cpp
+++ b/src/video_core/dirty_flags.cpp
@@ -58,6 +58,11 @@ void SetupDirtyRenderTargets(Maxwell3D::DirtyState::Tables& tables) {
58 FillBlock(table, OFF(zeta), NUM(zeta), flag); 58 FillBlock(table, OFF(zeta), NUM(zeta), flag);
59 } 59 }
60} 60}
61
62void SetupDirtyShaders(Maxwell3D::DirtyState::Tables& tables) {
63 FillBlock(tables[0], OFF(shader_config[0]),
64 NUM(shader_config[0]) * Maxwell3D::Regs::MaxShaderProgram, Shaders);
65}
61} // Anonymous namespace 66} // Anonymous namespace
62 67
63void SetupDirtyFlags(Maxwell3D::DirtyState::Tables& tables) { 68void SetupDirtyFlags(Maxwell3D::DirtyState::Tables& tables) {
@@ -65,6 +70,7 @@ void SetupDirtyFlags(Maxwell3D::DirtyState::Tables& tables) {
65 SetupIndexBuffer(tables); 70 SetupIndexBuffer(tables);
66 SetupDirtyDescriptors(tables); 71 SetupDirtyDescriptors(tables);
67 SetupDirtyRenderTargets(tables); 72 SetupDirtyRenderTargets(tables);
73 SetupDirtyShaders(tables);
68} 74}
69 75
70} // namespace VideoCommon::Dirty 76} // namespace VideoCommon::Dirty
diff --git a/src/video_core/dirty_flags.h b/src/video_core/dirty_flags.h
index 702688ace..504465d3f 100644
--- a/src/video_core/dirty_flags.h
+++ b/src/video_core/dirty_flags.h
@@ -36,6 +36,8 @@ enum : u8 {
36 36
37 IndexBuffer, 37 IndexBuffer,
38 38
39 Shaders,
40
39 LastCommonEntry, 41 LastCommonEntry,
40}; 42};
41 43
diff --git a/src/video_core/engines/const_buffer_engine_interface.h b/src/video_core/engines/const_buffer_engine_interface.h
deleted file mode 100644
index f46e81bb7..000000000
--- a/src/video_core/engines/const_buffer_engine_interface.h
+++ /dev/null
@@ -1,103 +0,0 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <type_traits>
8#include "common/bit_field.h"
9#include "common/common_types.h"
10#include "video_core/engines/shader_bytecode.h"
11#include "video_core/engines/shader_type.h"
12#include "video_core/guest_driver.h"
13#include "video_core/textures/texture.h"
14
15namespace Tegra::Engines {
16
17struct SamplerDescriptor {
18 union {
19 u32 raw = 0;
20 BitField<0, 2, Tegra::Shader::TextureType> texture_type;
21 BitField<2, 3, Tegra::Texture::ComponentType> r_type;
22 BitField<5, 1, u32> is_array;
23 BitField<6, 1, u32> is_buffer;
24 BitField<7, 1, u32> is_shadow;
25 BitField<8, 3, Tegra::Texture::ComponentType> g_type;
26 BitField<11, 3, Tegra::Texture::ComponentType> b_type;
27 BitField<14, 3, Tegra::Texture::ComponentType> a_type;
28 BitField<17, 7, Tegra::Texture::TextureFormat> format;
29 };
30
31 bool operator==(const SamplerDescriptor& rhs) const noexcept {
32 return raw == rhs.raw;
33 }
34
35 bool operator!=(const SamplerDescriptor& rhs) const noexcept {
36 return !operator==(rhs);
37 }
38
39 static SamplerDescriptor FromTIC(const Tegra::Texture::TICEntry& tic) {
40 using Tegra::Shader::TextureType;
41 SamplerDescriptor result;
42
43 result.format.Assign(tic.format.Value());
44 result.r_type.Assign(tic.r_type.Value());
45 result.g_type.Assign(tic.g_type.Value());
46 result.b_type.Assign(tic.b_type.Value());
47 result.a_type.Assign(tic.a_type.Value());
48
49 switch (tic.texture_type.Value()) {
50 case Tegra::Texture::TextureType::Texture1D:
51 result.texture_type.Assign(TextureType::Texture1D);
52 return result;
53 case Tegra::Texture::TextureType::Texture2D:
54 result.texture_type.Assign(TextureType::Texture2D);
55 return result;
56 case Tegra::Texture::TextureType::Texture3D:
57 result.texture_type.Assign(TextureType::Texture3D);
58 return result;
59 case Tegra::Texture::TextureType::TextureCubemap:
60 result.texture_type.Assign(TextureType::TextureCube);
61 return result;
62 case Tegra::Texture::TextureType::Texture1DArray:
63 result.texture_type.Assign(TextureType::Texture1D);
64 result.is_array.Assign(1);
65 return result;
66 case Tegra::Texture::TextureType::Texture2DArray:
67 result.texture_type.Assign(TextureType::Texture2D);
68 result.is_array.Assign(1);
69 return result;
70 case Tegra::Texture::TextureType::Texture1DBuffer:
71 result.texture_type.Assign(TextureType::Texture1D);
72 result.is_buffer.Assign(1);
73 return result;
74 case Tegra::Texture::TextureType::Texture2DNoMipmap:
75 result.texture_type.Assign(TextureType::Texture2D);
76 return result;
77 case Tegra::Texture::TextureType::TextureCubeArray:
78 result.texture_type.Assign(TextureType::TextureCube);
79 result.is_array.Assign(1);
80 return result;
81 default:
82 result.texture_type.Assign(TextureType::Texture2D);
83 return result;
84 }
85 }
86};
87static_assert(std::is_trivially_copyable_v<SamplerDescriptor>);
88
89class ConstBufferEngineInterface {
90public:
91 virtual ~ConstBufferEngineInterface() = default;
92 virtual u32 AccessConstBuffer32(ShaderType stage, u64 const_buffer, u64 offset) const = 0;
93 virtual SamplerDescriptor AccessBoundSampler(ShaderType stage, u64 offset) const = 0;
94 virtual SamplerDescriptor AccessBindlessSampler(ShaderType stage, u64 const_buffer,
95 u64 offset) const = 0;
96 virtual SamplerDescriptor AccessSampler(u32 handle) const = 0;
97 virtual u32 GetBoundBuffer() const = 0;
98
99 virtual VideoCore::GuestDriverProfile& AccessGuestDriverProfile() = 0;
100 virtual const VideoCore::GuestDriverProfile& AccessGuestDriverProfile() const = 0;
101};
102
103} // namespace Tegra::Engines
diff --git a/src/video_core/engines/kepler_compute.cpp b/src/video_core/engines/kepler_compute.cpp
index a9b75091e..492b4c5a3 100644
--- a/src/video_core/engines/kepler_compute.cpp
+++ b/src/video_core/engines/kepler_compute.cpp
@@ -8,7 +8,6 @@
8#include "core/core.h" 8#include "core/core.h"
9#include "video_core/engines/kepler_compute.h" 9#include "video_core/engines/kepler_compute.h"
10#include "video_core/engines/maxwell_3d.h" 10#include "video_core/engines/maxwell_3d.h"
11#include "video_core/engines/shader_type.h"
12#include "video_core/memory_manager.h" 11#include "video_core/memory_manager.h"
13#include "video_core/rasterizer_interface.h" 12#include "video_core/rasterizer_interface.h"
14#include "video_core/renderer_base.h" 13#include "video_core/renderer_base.h"
@@ -57,53 +56,11 @@ void KeplerCompute::CallMultiMethod(u32 method, const u32* base_start, u32 amoun
57 } 56 }
58} 57}
59 58
60u32 KeplerCompute::AccessConstBuffer32(ShaderType stage, u64 const_buffer, u64 offset) const {
61 ASSERT(stage == ShaderType::Compute);
62 const auto& buffer = launch_description.const_buffer_config[const_buffer];
63 u32 result;
64 std::memcpy(&result, memory_manager.GetPointer(buffer.Address() + offset), sizeof(u32));
65 return result;
66}
67
68SamplerDescriptor KeplerCompute::AccessBoundSampler(ShaderType stage, u64 offset) const {
69 return AccessBindlessSampler(stage, regs.tex_cb_index, offset * sizeof(Texture::TextureHandle));
70}
71
72SamplerDescriptor KeplerCompute::AccessBindlessSampler(ShaderType stage, u64 const_buffer,
73 u64 offset) const {
74 ASSERT(stage == ShaderType::Compute);
75 const auto& tex_info_buffer = launch_description.const_buffer_config[const_buffer];
76 const GPUVAddr tex_info_address = tex_info_buffer.Address() + offset;
77 return AccessSampler(memory_manager.Read<u32>(tex_info_address));
78}
79
80SamplerDescriptor KeplerCompute::AccessSampler(u32 handle) const {
81 const Texture::TextureHandle tex_handle{handle};
82 const Texture::TICEntry tic = GetTICEntry(tex_handle.tic_id);
83 const Texture::TSCEntry tsc = GetTSCEntry(tex_handle.tsc_id);
84
85 SamplerDescriptor result = SamplerDescriptor::FromTIC(tic);
86 result.is_shadow.Assign(tsc.depth_compare_enabled.Value());
87 return result;
88}
89
90VideoCore::GuestDriverProfile& KeplerCompute::AccessGuestDriverProfile() {
91 return rasterizer->AccessGuestDriverProfile();
92}
93
94const VideoCore::GuestDriverProfile& KeplerCompute::AccessGuestDriverProfile() const {
95 return rasterizer->AccessGuestDriverProfile();
96}
97
98void KeplerCompute::ProcessLaunch() { 59void KeplerCompute::ProcessLaunch() {
99 const GPUVAddr launch_desc_loc = regs.launch_desc_loc.Address(); 60 const GPUVAddr launch_desc_loc = regs.launch_desc_loc.Address();
100 memory_manager.ReadBlockUnsafe(launch_desc_loc, &launch_description, 61 memory_manager.ReadBlockUnsafe(launch_desc_loc, &launch_description,
101 LaunchParams::NUM_LAUNCH_PARAMETERS * sizeof(u32)); 62 LaunchParams::NUM_LAUNCH_PARAMETERS * sizeof(u32));
102 63 rasterizer->DispatchCompute();
103 const GPUVAddr code_addr = regs.code_loc.Address() + launch_description.program_start;
104 LOG_TRACE(HW_GPU, "Compute invocation launched at address 0x{:016x}", code_addr);
105
106 rasterizer->DispatchCompute(code_addr);
107} 64}
108 65
109Texture::TICEntry KeplerCompute::GetTICEntry(u32 tic_index) const { 66Texture::TICEntry KeplerCompute::GetTICEntry(u32 tic_index) const {
diff --git a/src/video_core/engines/kepler_compute.h b/src/video_core/engines/kepler_compute.h
index 7c40cba38..f8b8d06ac 100644
--- a/src/video_core/engines/kepler_compute.h
+++ b/src/video_core/engines/kepler_compute.h
@@ -10,10 +10,8 @@
10#include "common/bit_field.h" 10#include "common/bit_field.h"
11#include "common/common_funcs.h" 11#include "common/common_funcs.h"
12#include "common/common_types.h" 12#include "common/common_types.h"
13#include "video_core/engines/const_buffer_engine_interface.h"
14#include "video_core/engines/engine_interface.h" 13#include "video_core/engines/engine_interface.h"
15#include "video_core/engines/engine_upload.h" 14#include "video_core/engines/engine_upload.h"
16#include "video_core/engines/shader_type.h"
17#include "video_core/gpu.h" 15#include "video_core/gpu.h"
18#include "video_core/textures/texture.h" 16#include "video_core/textures/texture.h"
19 17
@@ -40,7 +38,7 @@ namespace Tegra::Engines {
40#define KEPLER_COMPUTE_REG_INDEX(field_name) \ 38#define KEPLER_COMPUTE_REG_INDEX(field_name) \
41 (offsetof(Tegra::Engines::KeplerCompute::Regs, field_name) / sizeof(u32)) 39 (offsetof(Tegra::Engines::KeplerCompute::Regs, field_name) / sizeof(u32))
42 40
43class KeplerCompute final : public ConstBufferEngineInterface, public EngineInterface { 41class KeplerCompute final : public EngineInterface {
44public: 42public:
45 explicit KeplerCompute(Core::System& system, MemoryManager& memory_manager); 43 explicit KeplerCompute(Core::System& system, MemoryManager& memory_manager);
46 ~KeplerCompute(); 44 ~KeplerCompute();
@@ -209,23 +207,6 @@ public:
209 void CallMultiMethod(u32 method, const u32* base_start, u32 amount, 207 void CallMultiMethod(u32 method, const u32* base_start, u32 amount,
210 u32 methods_pending) override; 208 u32 methods_pending) override;
211 209
212 u32 AccessConstBuffer32(ShaderType stage, u64 const_buffer, u64 offset) const override;
213
214 SamplerDescriptor AccessBoundSampler(ShaderType stage, u64 offset) const override;
215
216 SamplerDescriptor AccessBindlessSampler(ShaderType stage, u64 const_buffer,
217 u64 offset) const override;
218
219 SamplerDescriptor AccessSampler(u32 handle) const override;
220
221 u32 GetBoundBuffer() const override {
222 return regs.tex_cb_index;
223 }
224
225 VideoCore::GuestDriverProfile& AccessGuestDriverProfile() override;
226
227 const VideoCore::GuestDriverProfile& AccessGuestDriverProfile() const override;
228
229private: 210private:
230 void ProcessLaunch(); 211 void ProcessLaunch();
231 212
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp
index aab6b8f7a..b18b8a02a 100644
--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -8,7 +8,6 @@
8#include "core/core.h" 8#include "core/core.h"
9#include "core/core_timing.h" 9#include "core/core_timing.h"
10#include "video_core/engines/maxwell_3d.h" 10#include "video_core/engines/maxwell_3d.h"
11#include "video_core/engines/shader_type.h"
12#include "video_core/gpu.h" 11#include "video_core/gpu.h"
13#include "video_core/memory_manager.h" 12#include "video_core/memory_manager.h"
14#include "video_core/rasterizer_interface.h" 13#include "video_core/rasterizer_interface.h"
@@ -670,42 +669,4 @@ void Maxwell3D::ProcessClearBuffers() {
670 rasterizer->Clear(); 669 rasterizer->Clear();
671} 670}
672 671
673u32 Maxwell3D::AccessConstBuffer32(ShaderType stage, u64 const_buffer, u64 offset) const {
674 ASSERT(stage != ShaderType::Compute);
675 const auto& shader_stage = state.shader_stages[static_cast<std::size_t>(stage)];
676 const auto& buffer = shader_stage.const_buffers[const_buffer];
677 return memory_manager.Read<u32>(buffer.address + offset);
678}
679
680SamplerDescriptor Maxwell3D::AccessBoundSampler(ShaderType stage, u64 offset) const {
681 return AccessBindlessSampler(stage, regs.tex_cb_index, offset * sizeof(Texture::TextureHandle));
682}
683
684SamplerDescriptor Maxwell3D::AccessBindlessSampler(ShaderType stage, u64 const_buffer,
685 u64 offset) const {
686 ASSERT(stage != ShaderType::Compute);
687 const auto& shader = state.shader_stages[static_cast<std::size_t>(stage)];
688 const auto& tex_info_buffer = shader.const_buffers[const_buffer];
689 const GPUVAddr tex_info_address = tex_info_buffer.address + offset;
690 return AccessSampler(memory_manager.Read<u32>(tex_info_address));
691}
692
693SamplerDescriptor Maxwell3D::AccessSampler(u32 handle) const {
694 const Texture::TextureHandle tex_handle{handle};
695 const Texture::TICEntry tic = GetTICEntry(tex_handle.tic_id);
696 const Texture::TSCEntry tsc = GetTSCEntry(tex_handle.tsc_id);
697
698 SamplerDescriptor result = SamplerDescriptor::FromTIC(tic);
699 result.is_shadow.Assign(tsc.depth_compare_enabled.Value());
700 return result;
701}
702
703VideoCore::GuestDriverProfile& Maxwell3D::AccessGuestDriverProfile() {
704 return rasterizer->AccessGuestDriverProfile();
705}
706
707const VideoCore::GuestDriverProfile& Maxwell3D::AccessGuestDriverProfile() const {
708 return rasterizer->AccessGuestDriverProfile();
709}
710
711} // namespace Tegra::Engines 672} // namespace Tegra::Engines
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h
index 335383955..1aa43523a 100644
--- a/src/video_core/engines/maxwell_3d.h
+++ b/src/video_core/engines/maxwell_3d.h
@@ -17,11 +17,9 @@
17#include "common/common_funcs.h" 17#include "common/common_funcs.h"
18#include "common/common_types.h" 18#include "common/common_types.h"
19#include "common/math_util.h" 19#include "common/math_util.h"
20#include "video_core/engines/const_buffer_engine_interface.h"
21#include "video_core/engines/const_buffer_info.h" 20#include "video_core/engines/const_buffer_info.h"
22#include "video_core/engines/engine_interface.h" 21#include "video_core/engines/engine_interface.h"
23#include "video_core/engines/engine_upload.h" 22#include "video_core/engines/engine_upload.h"
24#include "video_core/engines/shader_type.h"
25#include "video_core/gpu.h" 23#include "video_core/gpu.h"
26#include "video_core/macro/macro.h" 24#include "video_core/macro/macro.h"
27#include "video_core/textures/texture.h" 25#include "video_core/textures/texture.h"
@@ -49,7 +47,7 @@ namespace Tegra::Engines {
49#define MAXWELL3D_REG_INDEX(field_name) \ 47#define MAXWELL3D_REG_INDEX(field_name) \
50 (offsetof(Tegra::Engines::Maxwell3D::Regs, field_name) / sizeof(u32)) 48 (offsetof(Tegra::Engines::Maxwell3D::Regs, field_name) / sizeof(u32))
51 49
52class Maxwell3D final : public ConstBufferEngineInterface, public EngineInterface { 50class Maxwell3D final : public EngineInterface {
53public: 51public:
54 explicit Maxwell3D(Core::System& system, MemoryManager& memory_manager); 52 explicit Maxwell3D(Core::System& system, MemoryManager& memory_manager);
55 ~Maxwell3D(); 53 ~Maxwell3D();
@@ -307,10 +305,6 @@ public:
307 return (type == Type::SignedNorm) || (type == Type::UnsignedNorm); 305 return (type == Type::SignedNorm) || (type == Type::UnsignedNorm);
308 } 306 }
309 307
310 bool IsConstant() const {
311 return constant;
312 }
313
314 bool IsValid() const { 308 bool IsValid() const {
315 return size != Size::Invalid; 309 return size != Size::Invalid;
316 } 310 }
@@ -912,7 +906,11 @@ public:
912 906
913 u32 fill_rectangle; 907 u32 fill_rectangle;
914 908
915 INSERT_PADDING_WORDS_NOINIT(0x8); 909 INSERT_PADDING_WORDS_NOINIT(0x2);
910
911 u32 conservative_raster_enable;
912
913 INSERT_PADDING_WORDS_NOINIT(0x5);
916 914
917 std::array<VertexAttribute, NumVertexAttributes> vertex_attrib_format; 915 std::array<VertexAttribute, NumVertexAttributes> vertex_attrib_format;
918 916
@@ -959,7 +957,11 @@ public:
959 957
960 SamplerIndex sampler_index; 958 SamplerIndex sampler_index;
961 959
962 INSERT_PADDING_WORDS_NOINIT(0x25); 960 INSERT_PADDING_WORDS_NOINIT(0x2);
961
962 std::array<u32, 8> gp_passthrough_mask;
963
964 INSERT_PADDING_WORDS_NOINIT(0x1B);
963 965
964 u32 depth_test_enable; 966 u32 depth_test_enable;
965 967
@@ -1152,7 +1154,11 @@ public:
1152 u32 index; 1154 u32 index;
1153 } primitive_restart; 1155 } primitive_restart;
1154 1156
1155 INSERT_PADDING_WORDS_NOINIT(0x5F); 1157 INSERT_PADDING_WORDS_NOINIT(0xE);
1158
1159 u32 provoking_vertex_last;
1160
1161 INSERT_PADDING_WORDS_NOINIT(0x50);
1156 1162
1157 struct { 1163 struct {
1158 u32 start_addr_high; 1164 u32 start_addr_high;
@@ -1424,23 +1430,6 @@ public:
1424 1430
1425 void FlushMMEInlineDraw(); 1431 void FlushMMEInlineDraw();
1426 1432
1427 u32 AccessConstBuffer32(ShaderType stage, u64 const_buffer, u64 offset) const override;
1428
1429 SamplerDescriptor AccessBoundSampler(ShaderType stage, u64 offset) const override;
1430
1431 SamplerDescriptor AccessBindlessSampler(ShaderType stage, u64 const_buffer,
1432 u64 offset) const override;
1433
1434 SamplerDescriptor AccessSampler(u32 handle) const override;
1435
1436 u32 GetBoundBuffer() const override {
1437 return regs.tex_cb_index;
1438 }
1439
1440 VideoCore::GuestDriverProfile& AccessGuestDriverProfile() override;
1441
1442 const VideoCore::GuestDriverProfile& AccessGuestDriverProfile() const override;
1443
1444 bool ShouldExecute() const { 1433 bool ShouldExecute() const {
1445 return execute_on; 1434 return execute_on;
1446 } 1435 }
@@ -1630,6 +1619,7 @@ ASSERT_REG_POSITION(zeta, 0x3F8);
1630ASSERT_REG_POSITION(render_area, 0x3FD); 1619ASSERT_REG_POSITION(render_area, 0x3FD);
1631ASSERT_REG_POSITION(clear_flags, 0x43E); 1620ASSERT_REG_POSITION(clear_flags, 0x43E);
1632ASSERT_REG_POSITION(fill_rectangle, 0x44F); 1621ASSERT_REG_POSITION(fill_rectangle, 0x44F);
1622ASSERT_REG_POSITION(conservative_raster_enable, 0x452);
1633ASSERT_REG_POSITION(vertex_attrib_format, 0x458); 1623ASSERT_REG_POSITION(vertex_attrib_format, 0x458);
1634ASSERT_REG_POSITION(multisample_sample_locations, 0x478); 1624ASSERT_REG_POSITION(multisample_sample_locations, 0x478);
1635ASSERT_REG_POSITION(multisample_coverage_to_color, 0x47E); 1625ASSERT_REG_POSITION(multisample_coverage_to_color, 0x47E);
@@ -1638,6 +1628,7 @@ ASSERT_REG_POSITION(zeta_width, 0x48a);
1638ASSERT_REG_POSITION(zeta_height, 0x48b); 1628ASSERT_REG_POSITION(zeta_height, 0x48b);
1639ASSERT_REG_POSITION(zeta_depth, 0x48c); 1629ASSERT_REG_POSITION(zeta_depth, 0x48c);
1640ASSERT_REG_POSITION(sampler_index, 0x48D); 1630ASSERT_REG_POSITION(sampler_index, 0x48D);
1631ASSERT_REG_POSITION(gp_passthrough_mask, 0x490);
1641ASSERT_REG_POSITION(depth_test_enable, 0x4B3); 1632ASSERT_REG_POSITION(depth_test_enable, 0x4B3);
1642ASSERT_REG_POSITION(independent_blend_enable, 0x4B9); 1633ASSERT_REG_POSITION(independent_blend_enable, 0x4B9);
1643ASSERT_REG_POSITION(depth_write_enabled, 0x4BA); 1634ASSERT_REG_POSITION(depth_write_enabled, 0x4BA);
@@ -1690,6 +1681,7 @@ ASSERT_REG_POSITION(point_coord_replace, 0x581);
1690ASSERT_REG_POSITION(code_address, 0x582); 1681ASSERT_REG_POSITION(code_address, 0x582);
1691ASSERT_REG_POSITION(draw, 0x585); 1682ASSERT_REG_POSITION(draw, 0x585);
1692ASSERT_REG_POSITION(primitive_restart, 0x591); 1683ASSERT_REG_POSITION(primitive_restart, 0x591);
1684ASSERT_REG_POSITION(provoking_vertex_last, 0x5A1);
1693ASSERT_REG_POSITION(index_array, 0x5F2); 1685ASSERT_REG_POSITION(index_array, 0x5F2);
1694ASSERT_REG_POSITION(polygon_offset_clamp, 0x61F); 1686ASSERT_REG_POSITION(polygon_offset_clamp, 0x61F);
1695ASSERT_REG_POSITION(instanced_arrays, 0x620); 1687ASSERT_REG_POSITION(instanced_arrays, 0x620);
diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp
index c51776466..c7ec1eac9 100644
--- a/src/video_core/engines/maxwell_dma.cpp
+++ b/src/video_core/engines/maxwell_dma.cpp
@@ -127,7 +127,8 @@ void MaxwellDMA::CopyBlockLinearToPitch() {
127 127
128 // Optimized path for micro copies. 128 // Optimized path for micro copies.
129 const size_t dst_size = static_cast<size_t>(regs.pitch_out) * regs.line_count; 129 const size_t dst_size = static_cast<size_t>(regs.pitch_out) * regs.line_count;
130 if (dst_size < GOB_SIZE && regs.pitch_out <= GOB_SIZE_X) { 130 if (dst_size < GOB_SIZE && regs.pitch_out <= GOB_SIZE_X &&
131 regs.src_params.height > GOB_SIZE_Y) {
131 FastCopyBlockLinearToPitch(); 132 FastCopyBlockLinearToPitch();
132 return; 133 return;
133 } 134 }
diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h
deleted file mode 100644
index 8b45f1b62..000000000
--- a/src/video_core/engines/shader_bytecode.h
+++ /dev/null
@@ -1,2298 +0,0 @@
1// Copyright 2018 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <array>
8#include <bitset>
9#include <optional>
10#include <tuple>
11#include <vector>
12
13#include "common/assert.h"
14#include "common/bit_field.h"
15#include "common/common_types.h"
16
17namespace Tegra::Shader {
18
19struct Register {
20 /// Number of registers
21 static constexpr std::size_t NumRegisters = 256;
22
23 /// Register 255 is special cased to always be 0
24 static constexpr std::size_t ZeroIndex = 255;
25
26 enum class Size : u64 {
27 Byte = 0,
28 Short = 1,
29 Word = 2,
30 Long = 3,
31 };
32
33 constexpr Register() = default;
34
35 constexpr Register(u64 value_) : value(value_) {}
36
37 [[nodiscard]] constexpr operator u64() const {
38 return value;
39 }
40
41 template <typename T>
42 [[nodiscard]] constexpr u64 operator-(const T& oth) const {
43 return value - oth;
44 }
45
46 template <typename T>
47 [[nodiscard]] constexpr u64 operator&(const T& oth) const {
48 return value & oth;
49 }
50
51 [[nodiscard]] constexpr u64 operator&(const Register& oth) const {
52 return value & oth.value;
53 }
54
55 [[nodiscard]] constexpr u64 operator~() const {
56 return ~value;
57 }
58
59 [[nodiscard]] u64 GetSwizzledIndex(u64 elem) const {
60 elem = (value + elem) & 3;
61 return (value & ~3) + elem;
62 }
63
64private:
65 u64 value{};
66};
67
68enum class AttributeSize : u64 {
69 Word = 0,
70 DoubleWord = 1,
71 TripleWord = 2,
72 QuadWord = 3,
73};
74
75union Attribute {
76 Attribute() = default;
77
78 constexpr explicit Attribute(u64 value_) : value(value_) {}
79
80 enum class Index : u64 {
81 LayerViewportPointSize = 6,
82 Position = 7,
83 Attribute_0 = 8,
84 Attribute_31 = 39,
85 FrontColor = 40,
86 FrontSecondaryColor = 41,
87 BackColor = 42,
88 BackSecondaryColor = 43,
89 ClipDistances0123 = 44,
90 ClipDistances4567 = 45,
91 PointCoord = 46,
92 // This attribute contains a tuple of (~, ~, InstanceId, VertexId) when inside a vertex
93 // shader, and a tuple of (TessCoord.x, TessCoord.y, TessCoord.z, ~) when inside a Tess Eval
94 // shader.
95 TessCoordInstanceIDVertexID = 47,
96 TexCoord_0 = 48,
97 TexCoord_7 = 55,
98 // This attribute contains a tuple of (Unk, Unk, Unk, gl_FrontFacing) when inside a fragment
99 // shader. It is unknown what the other values contain.
100 FrontFacing = 63,
101 };
102
103 union {
104 BitField<20, 10, u64> immediate;
105 BitField<22, 2, u64> element;
106 BitField<24, 6, Index> index;
107 BitField<31, 1, u64> patch;
108 BitField<47, 3, AttributeSize> size;
109
110 [[nodiscard]] bool IsPhysical() const {
111 return patch == 0 && element == 0 && static_cast<u64>(index.Value()) == 0;
112 }
113 } fmt20;
114
115 union {
116 BitField<30, 2, u64> element;
117 BitField<32, 6, Index> index;
118 } fmt28;
119
120 BitField<39, 8, u64> reg;
121 u64 value{};
122};
123
124union Sampler {
125 Sampler() = default;
126
127 constexpr explicit Sampler(u64 value_) : value(value_) {}
128
129 enum class Index : u64 {
130 Sampler_0 = 8,
131 };
132
133 BitField<36, 13, Index> index;
134 u64 value{};
135};
136
137union Image {
138 Image() = default;
139
140 constexpr explicit Image(u64 value_) : value{value_} {}
141
142 BitField<36, 13, u64> index;
143 u64 value;
144};
145
146} // namespace Tegra::Shader
147
148namespace std {
149
150// TODO(bunnei): The below is forbidden by the C++ standard, but works fine. See #330.
151template <>
152struct make_unsigned<Tegra::Shader::Attribute> {
153 using type = Tegra::Shader::Attribute;
154};
155
156template <>
157struct make_unsigned<Tegra::Shader::Register> {
158 using type = Tegra::Shader::Register;
159};
160
161} // namespace std
162
163namespace Tegra::Shader {
164
165enum class Pred : u64 {
166 UnusedIndex = 0x7,
167 NeverExecute = 0xF,
168};
169
170enum class PredCondition : u64 {
171 F = 0, // Always false
172 LT = 1, // Ordered less than
173 EQ = 2, // Ordered equal
174 LE = 3, // Ordered less than or equal
175 GT = 4, // Ordered greater than
176 NE = 5, // Ordered not equal
177 GE = 6, // Ordered greater than or equal
178 NUM = 7, // Ordered
179 NAN_ = 8, // Unordered
180 LTU = 9, // Unordered less than
181 EQU = 10, // Unordered equal
182 LEU = 11, // Unordered less than or equal
183 GTU = 12, // Unordered greater than
184 NEU = 13, // Unordered not equal
185 GEU = 14, // Unordered greater than or equal
186 T = 15, // Always true
187};
188
189enum class PredOperation : u64 {
190 And = 0,
191 Or = 1,
192 Xor = 2,
193};
194
195enum class LogicOperation : u64 {
196 And = 0,
197 Or = 1,
198 Xor = 2,
199 PassB = 3,
200};
201
202enum class SubOp : u64 {
203 Cos = 0x0,
204 Sin = 0x1,
205 Ex2 = 0x2,
206 Lg2 = 0x3,
207 Rcp = 0x4,
208 Rsq = 0x5,
209 Sqrt = 0x8,
210};
211
212enum class F2iRoundingOp : u64 {
213 RoundEven = 0,
214 Floor = 1,
215 Ceil = 2,
216 Trunc = 3,
217};
218
219enum class F2fRoundingOp : u64 {
220 None = 0,
221 Pass = 3,
222 Round = 8,
223 Floor = 9,
224 Ceil = 10,
225 Trunc = 11,
226};
227
228enum class AtomicOp : u64 {
229 Add = 0,
230 Min = 1,
231 Max = 2,
232 Inc = 3,
233 Dec = 4,
234 And = 5,
235 Or = 6,
236 Xor = 7,
237 Exch = 8,
238 SafeAdd = 10,
239};
240
241enum class GlobalAtomicType : u64 {
242 U32 = 0,
243 S32 = 1,
244 U64 = 2,
245 F32_FTZ_RN = 3,
246 F16x2_FTZ_RN = 4,
247 S64 = 5,
248};
249
250enum class UniformType : u64 {
251 UnsignedByte = 0,
252 SignedByte = 1,
253 UnsignedShort = 2,
254 SignedShort = 3,
255 Single = 4,
256 Double = 5,
257 Quad = 6,
258 UnsignedQuad = 7,
259};
260
261enum class StoreType : u64 {
262 Unsigned8 = 0,
263 Signed8 = 1,
264 Unsigned16 = 2,
265 Signed16 = 3,
266 Bits32 = 4,
267 Bits64 = 5,
268 Bits128 = 6,
269};
270
271enum class AtomicType : u64 {
272 U32 = 0,
273 S32 = 1,
274 U64 = 2,
275 S64 = 3,
276};
277
278enum class IMinMaxExchange : u64 {
279 None = 0,
280 XLo = 1,
281 XMed = 2,
282 XHi = 3,
283};
284
285enum class VideoType : u64 {
286 Size16_Low = 0,
287 Size16_High = 1,
288 Size32 = 2,
289 Invalid = 3,
290};
291
292enum class VmadShr : u64 {
293 Shr7 = 1,
294 Shr15 = 2,
295};
296
297enum class VmnmxType : u64 {
298 Bits8,
299 Bits16,
300 Bits32,
301};
302
303enum class VmnmxOperation : u64 {
304 Mrg_16H = 0,
305 Mrg_16L = 1,
306 Mrg_8B0 = 2,
307 Mrg_8B2 = 3,
308 Acc = 4,
309 Min = 5,
310 Max = 6,
311 Nop = 7,
312};
313
314enum class XmadMode : u64 {
315 None = 0,
316 CLo = 1,
317 CHi = 2,
318 CSfu = 3,
319 CBcc = 4,
320};
321
322enum class IAdd3Mode : u64 {
323 None = 0,
324 RightShift = 1,
325 LeftShift = 2,
326};
327
328enum class IAdd3Height : u64 {
329 None = 0,
330 LowerHalfWord = 1,
331 UpperHalfWord = 2,
332};
333
334enum class FlowCondition : u64 {
335 Always = 0xF,
336 Fcsm_Tr = 0x1C, // TODO(bunnei): What is this used for?
337};
338
339enum class ConditionCode : u64 {
340 F = 0,
341 LT = 1,
342 EQ = 2,
343 LE = 3,
344 GT = 4,
345 NE = 5,
346 GE = 6,
347 Num = 7,
348 Nan = 8,
349 LTU = 9,
350 EQU = 10,
351 LEU = 11,
352 GTU = 12,
353 NEU = 13,
354 GEU = 14,
355 T = 15,
356 OFF = 16,
357 LO = 17,
358 SFF = 18,
359 LS = 19,
360 HI = 20,
361 SFT = 21,
362 HS = 22,
363 OFT = 23,
364 CSM_TA = 24,
365 CSM_TR = 25,
366 CSM_MX = 26,
367 FCSM_TA = 27,
368 FCSM_TR = 28,
369 FCSM_MX = 29,
370 RLE = 30,
371 RGT = 31,
372};
373
374enum class PredicateResultMode : u64 {
375 None = 0x0,
376 NotZero = 0x3,
377};
378
379enum class TextureType : u64 {
380 Texture1D = 0,
381 Texture2D = 1,
382 Texture3D = 2,
383 TextureCube = 3,
384};
385
386enum class TextureQueryType : u64 {
387 Dimension = 1,
388 TextureType = 2,
389 SamplePosition = 5,
390 Filter = 16,
391 LevelOfDetail = 18,
392 Wrap = 20,
393 BorderColor = 22,
394};
395
396enum class TextureProcessMode : u64 {
397 None = 0,
398 LZ = 1, // Load LOD of zero.
399 LB = 2, // Load Bias.
400 LL = 3, // Load LOD.
401 LBA = 6, // Load Bias. The A is unknown, does not appear to differ with LB.
402 LLA = 7 // Load LOD. The A is unknown, does not appear to differ with LL.
403};
404
405enum class TextureMiscMode : u64 {
406 DC,
407 AOFFI, // Uses Offset
408 NDV,
409 NODEP,
410 MZ,
411 PTP,
412};
413
414enum class SurfaceDataMode : u64 {
415 P = 0,
416 D_BA = 1,
417};
418
419enum class OutOfBoundsStore : u64 {
420 Ignore = 0,
421 Clamp = 1,
422 Trap = 2,
423};
424
425enum class ImageType : u64 {
426 Texture1D = 0,
427 TextureBuffer = 1,
428 Texture1DArray = 2,
429 Texture2D = 3,
430 Texture2DArray = 4,
431 Texture3D = 5,
432};
433
434enum class IsberdMode : u64 {
435 None = 0,
436 Patch = 1,
437 Prim = 2,
438 Attr = 3,
439};
440
441enum class IsberdShift : u64 { None = 0, U16 = 1, B32 = 2 };
442
443enum class MembarType : u64 {
444 CTA = 0,
445 GL = 1,
446 SYS = 2,
447 VC = 3,
448};
449
450enum class MembarUnknown : u64 { Default = 0, IVALLD = 1, IVALLT = 2, IVALLTD = 3 };
451
452enum class HalfType : u64 {
453 H0_H1 = 0,
454 F32 = 1,
455 H0_H0 = 2,
456 H1_H1 = 3,
457};
458
459enum class HalfMerge : u64 {
460 H0_H1 = 0,
461 F32 = 1,
462 Mrg_H0 = 2,
463 Mrg_H1 = 3,
464};
465
466enum class HalfPrecision : u64 {
467 None = 0,
468 FTZ = 1,
469 FMZ = 2,
470};
471
472enum class R2pMode : u64 {
473 Pr = 0,
474 Cc = 1,
475};
476
477enum class IpaInterpMode : u64 {
478 Pass = 0,
479 Multiply = 1,
480 Constant = 2,
481 Sc = 3,
482};
483
484enum class IpaSampleMode : u64 {
485 Default = 0,
486 Centroid = 1,
487 Offset = 2,
488};
489
490enum class LmemLoadCacheManagement : u64 {
491 Default = 0,
492 LU = 1,
493 CI = 2,
494 CV = 3,
495};
496
497enum class StoreCacheManagement : u64 {
498 Default = 0,
499 CG = 1,
500 CS = 2,
501 WT = 3,
502};
503
504struct IpaMode {
505 IpaInterpMode interpolation_mode;
506 IpaSampleMode sampling_mode;
507
508 [[nodiscard]] bool operator==(const IpaMode& a) const {
509 return std::tie(interpolation_mode, sampling_mode) ==
510 std::tie(a.interpolation_mode, a.sampling_mode);
511 }
512 [[nodiscard]] bool operator!=(const IpaMode& a) const {
513 return !operator==(a);
514 }
515 [[nodiscard]] bool operator<(const IpaMode& a) const {
516 return std::tie(interpolation_mode, sampling_mode) <
517 std::tie(a.interpolation_mode, a.sampling_mode);
518 }
519};
520
521enum class SystemVariable : u64 {
522 LaneId = 0x00,
523 VirtCfg = 0x02,
524 VirtId = 0x03,
525 Pm0 = 0x04,
526 Pm1 = 0x05,
527 Pm2 = 0x06,
528 Pm3 = 0x07,
529 Pm4 = 0x08,
530 Pm5 = 0x09,
531 Pm6 = 0x0a,
532 Pm7 = 0x0b,
533 OrderingTicket = 0x0f,
534 PrimType = 0x10,
535 InvocationId = 0x11,
536 Ydirection = 0x12,
537 ThreadKill = 0x13,
538 ShaderType = 0x14,
539 DirectBeWriteAddressLow = 0x15,
540 DirectBeWriteAddressHigh = 0x16,
541 DirectBeWriteEnabled = 0x17,
542 MachineId0 = 0x18,
543 MachineId1 = 0x19,
544 MachineId2 = 0x1a,
545 MachineId3 = 0x1b,
546 Affinity = 0x1c,
547 InvocationInfo = 0x1d,
548 WscaleFactorXY = 0x1e,
549 WscaleFactorZ = 0x1f,
550 Tid = 0x20,
551 TidX = 0x21,
552 TidY = 0x22,
553 TidZ = 0x23,
554 CtaParam = 0x24,
555 CtaIdX = 0x25,
556 CtaIdY = 0x26,
557 CtaIdZ = 0x27,
558 NtId = 0x28,
559 CirQueueIncrMinusOne = 0x29,
560 Nlatc = 0x2a,
561 SmSpaVersion = 0x2c,
562 MultiPassShaderInfo = 0x2d,
563 LwinHi = 0x2e,
564 SwinHi = 0x2f,
565 SwinLo = 0x30,
566 SwinSz = 0x31,
567 SmemSz = 0x32,
568 SmemBanks = 0x33,
569 LwinLo = 0x34,
570 LwinSz = 0x35,
571 LmemLosz = 0x36,
572 LmemHioff = 0x37,
573 EqMask = 0x38,
574 LtMask = 0x39,
575 LeMask = 0x3a,
576 GtMask = 0x3b,
577 GeMask = 0x3c,
578 RegAlloc = 0x3d,
579 CtxAddr = 0x3e, // .fmask = F_SM50
580 BarrierAlloc = 0x3e, // .fmask = F_SM60
581 GlobalErrorStatus = 0x40,
582 WarpErrorStatus = 0x42,
583 WarpErrorStatusClear = 0x43,
584 PmHi0 = 0x48,
585 PmHi1 = 0x49,
586 PmHi2 = 0x4a,
587 PmHi3 = 0x4b,
588 PmHi4 = 0x4c,
589 PmHi5 = 0x4d,
590 PmHi6 = 0x4e,
591 PmHi7 = 0x4f,
592 ClockLo = 0x50,
593 ClockHi = 0x51,
594 GlobalTimerLo = 0x52,
595 GlobalTimerHi = 0x53,
596 HwTaskId = 0x60,
597 CircularQueueEntryIndex = 0x61,
598 CircularQueueEntryAddressLow = 0x62,
599 CircularQueueEntryAddressHigh = 0x63,
600};
601
602enum class PhysicalAttributeDirection : u64 {
603 Input = 0,
604 Output = 1,
605};
606
607enum class VoteOperation : u64 {
608 All = 0, // allThreadsNV
609 Any = 1, // anyThreadNV
610 Eq = 2, // allThreadsEqualNV
611};
612
613enum class ImageAtomicOperationType : u64 {
614 U32 = 0,
615 S32 = 1,
616 U64 = 2,
617 F32 = 3,
618 S64 = 5,
619 SD32 = 6,
620 SD64 = 7,
621};
622
623enum class ImageAtomicOperation : u64 {
624 Add = 0,
625 Min = 1,
626 Max = 2,
627 Inc = 3,
628 Dec = 4,
629 And = 5,
630 Or = 6,
631 Xor = 7,
632 Exch = 8,
633};
634
635enum class ShuffleOperation : u64 {
636 Idx = 0, // shuffleNV
637 Up = 1, // shuffleUpNV
638 Down = 2, // shuffleDownNV
639 Bfly = 3, // shuffleXorNV
640};
641
642enum class ShfType : u64 {
643 Bits32 = 0,
644 U64 = 2,
645 S64 = 3,
646};
647
648enum class ShfXmode : u64 {
649 None = 0,
650 HI = 1,
651 X = 2,
652 XHI = 3,
653};
654
655union Instruction {
656 constexpr Instruction& operator=(const Instruction& instr) {
657 value = instr.value;
658 return *this;
659 }
660
661 constexpr Instruction(u64 value_) : value{value_} {}
662 constexpr Instruction(const Instruction& instr) : value(instr.value) {}
663
664 [[nodiscard]] constexpr bool Bit(u64 offset) const {
665 return ((value >> offset) & 1) != 0;
666 }
667
668 BitField<0, 8, Register> gpr0;
669 BitField<8, 8, Register> gpr8;
670 union {
671 BitField<16, 4, Pred> full_pred;
672 BitField<16, 3, u64> pred_index;
673 } pred;
674 BitField<19, 1, u64> negate_pred;
675 BitField<20, 8, Register> gpr20;
676 BitField<20, 4, SubOp> sub_op;
677 BitField<28, 8, Register> gpr28;
678 BitField<39, 8, Register> gpr39;
679 BitField<48, 16, u64> opcode;
680
681 union {
682 BitField<8, 5, ConditionCode> cc;
683 BitField<13, 1, u64> trigger;
684 } nop;
685
686 union {
687 BitField<48, 2, VoteOperation> operation;
688 BitField<45, 3, u64> dest_pred;
689 BitField<39, 3, u64> value;
690 BitField<42, 1, u64> negate_value;
691 } vote;
692
693 union {
694 BitField<30, 2, ShuffleOperation> operation;
695 BitField<48, 3, u64> pred48;
696 BitField<28, 1, u64> is_index_imm;
697 BitField<29, 1, u64> is_mask_imm;
698 BitField<20, 5, u64> index_imm;
699 BitField<34, 13, u64> mask_imm;
700 } shfl;
701
702 union {
703 BitField<44, 1, u64> ftz;
704 BitField<39, 2, u64> tab5cb8_2;
705 BitField<38, 1, u64> ndv;
706 BitField<47, 1, u64> cc;
707 BitField<28, 8, u64> swizzle;
708 } fswzadd;
709
710 union {
711 BitField<8, 8, Register> gpr;
712 BitField<20, 24, s64> offset;
713 } gmem;
714
715 union {
716 BitField<20, 16, u64> imm20_16;
717 BitField<20, 19, u64> imm20_19;
718 BitField<20, 32, s64> imm20_32;
719 BitField<45, 1, u64> negate_b;
720 BitField<46, 1, u64> abs_a;
721 BitField<48, 1, u64> negate_a;
722 BitField<49, 1, u64> abs_b;
723 BitField<50, 1, u64> saturate_d;
724 BitField<56, 1, u64> negate_imm;
725
726 union {
727 BitField<39, 3, u64> pred;
728 BitField<42, 1, u64> negate_pred;
729 } fmnmx;
730
731 union {
732 BitField<39, 1, u64> invert_a;
733 BitField<40, 1, u64> invert_b;
734 BitField<41, 2, LogicOperation> operation;
735 BitField<44, 2, PredicateResultMode> pred_result_mode;
736 BitField<48, 3, Pred> pred48;
737 } lop;
738
739 union {
740 BitField<53, 2, LogicOperation> operation;
741 BitField<55, 1, u64> invert_a;
742 BitField<56, 1, u64> invert_b;
743 } lop32i;
744
745 union {
746 BitField<28, 8, u64> imm_lut28;
747 BitField<48, 8, u64> imm_lut48;
748
749 [[nodiscard]] u32 GetImmLut28() const {
750 return static_cast<u32>(imm_lut28);
751 }
752
753 [[nodiscard]] u32 GetImmLut48() const {
754 return static_cast<u32>(imm_lut48);
755 }
756 } lop3;
757
758 [[nodiscard]] u16 GetImm20_16() const {
759 return static_cast<u16>(imm20_16);
760 }
761
762 [[nodiscard]] u32 GetImm20_19() const {
763 u32 imm{static_cast<u32>(imm20_19)};
764 imm <<= 12;
765 imm |= negate_imm ? 0x80000000 : 0;
766 return imm;
767 }
768
769 [[nodiscard]] u32 GetImm20_32() const {
770 return static_cast<u32>(imm20_32);
771 }
772
773 [[nodiscard]] s32 GetSignedImm20_20() const {
774 const auto immediate = static_cast<u32>(imm20_19 | (negate_imm << 19));
775 // Sign extend the 20-bit value.
776 const auto mask = 1U << (20 - 1);
777 return static_cast<s32>((immediate ^ mask) - mask);
778 }
779 } alu;
780
781 union {
782 BitField<38, 1, u64> idx;
783 BitField<51, 1, u64> saturate;
784 BitField<52, 2, IpaSampleMode> sample_mode;
785 BitField<54, 2, IpaInterpMode> interp_mode;
786 } ipa;
787
788 union {
789 BitField<39, 2, u64> tab5cb8_2;
790 BitField<41, 3, u64> postfactor;
791 BitField<44, 2, u64> tab5c68_0;
792 BitField<48, 1, u64> negate_b;
793 } fmul;
794
795 union {
796 BitField<55, 1, u64> saturate;
797 } fmul32;
798
799 union {
800 BitField<52, 1, u64> generates_cc;
801 } op_32;
802
803 union {
804 BitField<48, 1, u64> is_signed;
805 } shift;
806
807 union {
808 BitField<39, 1, u64> wrap;
809 } shr;
810
811 union {
812 BitField<37, 2, ShfType> type;
813 BitField<48, 2, ShfXmode> xmode;
814 BitField<50, 1, u64> wrap;
815 BitField<20, 6, u64> immediate;
816 } shf;
817
818 union {
819 BitField<39, 5, u64> shift_amount;
820 BitField<48, 1, u64> negate_b;
821 BitField<49, 1, u64> negate_a;
822 } alu_integer;
823
824 union {
825 BitField<43, 1, u64> x;
826 } iadd;
827
828 union {
829 BitField<39, 1, u64> ftz;
830 BitField<32, 1, u64> saturate;
831 BitField<49, 2, HalfMerge> merge;
832
833 BitField<44, 1, u64> abs_a;
834 BitField<47, 2, HalfType> type_a;
835
836 BitField<30, 1, u64> abs_b;
837 BitField<28, 2, HalfType> type_b;
838
839 BitField<35, 2, HalfType> type_c;
840 } alu_half;
841
842 union {
843 BitField<39, 2, HalfPrecision> precision;
844 BitField<39, 1, u64> ftz;
845 BitField<52, 1, u64> saturate;
846 BitField<49, 2, HalfMerge> merge;
847
848 BitField<43, 1, u64> negate_a;
849 BitField<44, 1, u64> abs_a;
850 BitField<47, 2, HalfType> type_a;
851 } alu_half_imm;
852
853 union {
854 BitField<29, 1, u64> first_negate;
855 BitField<20, 9, u64> first;
856
857 BitField<56, 1, u64> second_negate;
858 BitField<30, 9, u64> second;
859
860 [[nodiscard]] u32 PackImmediates() const {
861 // Immediates are half floats shifted.
862 constexpr u32 imm_shift = 6;
863 return static_cast<u32>((first << imm_shift) | (second << (16 + imm_shift)));
864 }
865 } half_imm;
866
867 union {
868 union {
869 BitField<37, 2, HalfPrecision> precision;
870 BitField<32, 1, u64> saturate;
871
872 BitField<31, 1, u64> negate_b;
873 BitField<30, 1, u64> negate_c;
874 BitField<35, 2, HalfType> type_c;
875 } rr;
876
877 BitField<57, 2, HalfPrecision> precision;
878 BitField<52, 1, u64> saturate;
879
880 BitField<49, 2, HalfMerge> merge;
881
882 BitField<47, 2, HalfType> type_a;
883
884 BitField<56, 1, u64> negate_b;
885 BitField<28, 2, HalfType> type_b;
886
887 BitField<51, 1, u64> negate_c;
888 BitField<53, 2, HalfType> type_reg39;
889 } hfma2;
890
891 union {
892 BitField<40, 1, u64> invert;
893 } popc;
894
895 union {
896 BitField<41, 1, u64> sh;
897 BitField<40, 1, u64> invert;
898 BitField<48, 1, u64> is_signed;
899 } flo;
900
901 union {
902 BitField<39, 3, u64> pred;
903 BitField<42, 1, u64> neg_pred;
904 } sel;
905
906 union {
907 BitField<39, 3, u64> pred;
908 BitField<42, 1, u64> negate_pred;
909 BitField<43, 2, IMinMaxExchange> exchange;
910 BitField<48, 1, u64> is_signed;
911 } imnmx;
912
913 union {
914 BitField<31, 2, IAdd3Height> height_c;
915 BitField<33, 2, IAdd3Height> height_b;
916 BitField<35, 2, IAdd3Height> height_a;
917 BitField<37, 2, IAdd3Mode> mode;
918 BitField<49, 1, u64> neg_c;
919 BitField<50, 1, u64> neg_b;
920 BitField<51, 1, u64> neg_a;
921 } iadd3;
922
923 union {
924 BitField<54, 1, u64> saturate;
925 BitField<56, 1, u64> negate_a;
926 } iadd32i;
927
928 union {
929 BitField<53, 1, u64> negate_b;
930 BitField<54, 1, u64> abs_a;
931 BitField<56, 1, u64> negate_a;
932 BitField<57, 1, u64> abs_b;
933 } fadd32i;
934
935 union {
936 BitField<40, 1, u64> brev;
937 BitField<47, 1, u64> rd_cc;
938 BitField<48, 1, u64> is_signed;
939 } bfe;
940
941 union {
942 BitField<48, 3, u64> pred48;
943
944 union {
945 BitField<20, 20, u64> entry_a;
946 BitField<39, 5, u64> entry_b;
947 BitField<45, 1, u64> neg;
948 BitField<46, 1, u64> uses_cc;
949 } imm;
950
951 union {
952 BitField<20, 14, u64> cb_index;
953 BitField<34, 5, u64> cb_offset;
954 BitField<56, 1, u64> neg;
955 BitField<57, 1, u64> uses_cc;
956 } hi;
957
958 union {
959 BitField<20, 14, u64> cb_index;
960 BitField<34, 5, u64> cb_offset;
961 BitField<39, 5, u64> entry_a;
962 BitField<45, 1, u64> neg;
963 BitField<46, 1, u64> uses_cc;
964 } rz;
965
966 union {
967 BitField<39, 5, u64> entry_a;
968 BitField<45, 1, u64> neg;
969 BitField<46, 1, u64> uses_cc;
970 } r1;
971
972 union {
973 BitField<28, 8, u64> entry_a;
974 BitField<37, 1, u64> neg;
975 BitField<38, 1, u64> uses_cc;
976 } r2;
977
978 } lea;
979
980 union {
981 BitField<0, 5, FlowCondition> cond;
982 } flow;
983
984 union {
985 BitField<47, 1, u64> cc;
986 BitField<48, 1, u64> negate_b;
987 BitField<49, 1, u64> negate_c;
988 BitField<51, 2, u64> tab5980_1;
989 BitField<53, 2, u64> tab5980_0;
990 } ffma;
991
992 union {
993 BitField<48, 3, UniformType> type;
994 BitField<44, 2, u64> unknown;
995 } ld_c;
996
997 union {
998 BitField<48, 3, StoreType> type;
999 } ldst_sl;
1000
1001 union {
1002 BitField<44, 2, u64> unknown;
1003 } ld_l;
1004
1005 union {
1006 BitField<44, 2, StoreCacheManagement> cache_management;
1007 } st_l;
1008
1009 union {
1010 BitField<48, 3, UniformType> type;
1011 BitField<46, 2, u64> cache_mode;
1012 } ldg;
1013
1014 union {
1015 BitField<48, 3, UniformType> type;
1016 BitField<46, 2, u64> cache_mode;
1017 } stg;
1018
1019 union {
1020 BitField<23, 3, AtomicOp> operation;
1021 BitField<48, 1, u64> extended;
1022 BitField<20, 3, GlobalAtomicType> type;
1023 } red;
1024
1025 union {
1026 BitField<52, 4, AtomicOp> operation;
1027 BitField<49, 3, GlobalAtomicType> type;
1028 BitField<28, 20, s64> offset;
1029 } atom;
1030
1031 union {
1032 BitField<52, 4, AtomicOp> operation;
1033 BitField<28, 2, AtomicType> type;
1034 BitField<30, 22, s64> offset;
1035
1036 [[nodiscard]] s32 GetImmediateOffset() const {
1037 return static_cast<s32>(offset << 2);
1038 }
1039 } atoms;
1040
1041 union {
1042 BitField<32, 1, PhysicalAttributeDirection> direction;
1043 BitField<47, 3, AttributeSize> size;
1044 BitField<20, 11, u64> address;
1045 } al2p;
1046
1047 union {
1048 BitField<53, 3, UniformType> type;
1049 BitField<52, 1, u64> extended;
1050 } generic;
1051
1052 union {
1053 BitField<0, 3, u64> pred0;
1054 BitField<3, 3, u64> pred3;
1055 BitField<6, 1, u64> neg_b;
1056 BitField<7, 1, u64> abs_a;
1057 BitField<39, 3, u64> pred39;
1058 BitField<42, 1, u64> neg_pred;
1059 BitField<43, 1, u64> neg_a;
1060 BitField<44, 1, u64> abs_b;
1061 BitField<45, 2, PredOperation> op;
1062 BitField<47, 1, u64> ftz;
1063 BitField<48, 4, PredCondition> cond;
1064 } fsetp;
1065
1066 union {
1067 BitField<0, 3, u64> pred0;
1068 BitField<3, 3, u64> pred3;
1069 BitField<39, 3, u64> pred39;
1070 BitField<42, 1, u64> neg_pred;
1071 BitField<45, 2, PredOperation> op;
1072 BitField<48, 1, u64> is_signed;
1073 BitField<49, 3, PredCondition> cond;
1074 } isetp;
1075
1076 union {
1077 BitField<48, 1, u64> is_signed;
1078 BitField<49, 3, PredCondition> cond;
1079 } icmp;
1080
1081 union {
1082 BitField<0, 3, u64> pred0;
1083 BitField<3, 3, u64> pred3;
1084 BitField<12, 3, u64> pred12;
1085 BitField<15, 1, u64> neg_pred12;
1086 BitField<24, 2, PredOperation> cond;
1087 BitField<29, 3, u64> pred29;
1088 BitField<32, 1, u64> neg_pred29;
1089 BitField<39, 3, u64> pred39;
1090 BitField<42, 1, u64> neg_pred39;
1091 BitField<45, 2, PredOperation> op;
1092 } psetp;
1093
1094 union {
1095 BitField<43, 4, PredCondition> cond;
1096 BitField<45, 2, PredOperation> op;
1097 BitField<3, 3, u64> pred3;
1098 BitField<0, 3, u64> pred0;
1099 BitField<39, 3, u64> pred39;
1100 } vsetp;
1101
1102 union {
1103 BitField<12, 3, u64> pred12;
1104 BitField<15, 1, u64> neg_pred12;
1105 BitField<24, 2, PredOperation> cond;
1106 BitField<29, 3, u64> pred29;
1107 BitField<32, 1, u64> neg_pred29;
1108 BitField<39, 3, u64> pred39;
1109 BitField<42, 1, u64> neg_pred39;
1110 BitField<44, 1, u64> bf;
1111 BitField<45, 2, PredOperation> op;
1112 } pset;
1113
1114 union {
1115 BitField<0, 3, u64> pred0;
1116 BitField<3, 3, u64> pred3;
1117 BitField<8, 5, ConditionCode> cc; // flag in cc
1118 BitField<39, 3, u64> pred39;
1119 BitField<42, 1, u64> neg_pred39;
1120 BitField<45, 4, PredOperation> op; // op with pred39
1121 } csetp;
1122
1123 union {
1124 BitField<6, 1, u64> ftz;
1125 BitField<45, 2, PredOperation> op;
1126 BitField<3, 3, u64> pred3;
1127 BitField<0, 3, u64> pred0;
1128 BitField<43, 1, u64> negate_a;
1129 BitField<44, 1, u64> abs_a;
1130 BitField<47, 2, HalfType> type_a;
1131 union {
1132 BitField<35, 4, PredCondition> cond;
1133 BitField<49, 1, u64> h_and;
1134 BitField<31, 1, u64> negate_b;
1135 BitField<30, 1, u64> abs_b;
1136 BitField<28, 2, HalfType> type_b;
1137 } reg;
1138 union {
1139 BitField<56, 1, u64> negate_b;
1140 BitField<54, 1, u64> abs_b;
1141 } cbuf;
1142 union {
1143 BitField<49, 4, PredCondition> cond;
1144 BitField<53, 1, u64> h_and;
1145 } cbuf_and_imm;
1146 BitField<42, 1, u64> neg_pred;
1147 BitField<39, 3, u64> pred39;
1148 } hsetp2;
1149
1150 union {
1151 BitField<40, 1, R2pMode> mode;
1152 BitField<41, 2, u64> byte;
1153 BitField<20, 7, u64> immediate_mask;
1154 } p2r_r2p;
1155
1156 union {
1157 BitField<39, 3, u64> pred39;
1158 BitField<42, 1, u64> neg_pred;
1159 BitField<43, 1, u64> neg_a;
1160 BitField<44, 1, u64> abs_b;
1161 BitField<45, 2, PredOperation> op;
1162 BitField<48, 4, PredCondition> cond;
1163 BitField<52, 1, u64> bf;
1164 BitField<53, 1, u64> neg_b;
1165 BitField<54, 1, u64> abs_a;
1166 BitField<55, 1, u64> ftz;
1167 } fset;
1168
1169 union {
1170 BitField<47, 1, u64> ftz;
1171 BitField<48, 4, PredCondition> cond;
1172 } fcmp;
1173
1174 union {
1175 BitField<49, 1, u64> bf;
1176 BitField<35, 3, PredCondition> cond;
1177 BitField<50, 1, u64> ftz;
1178 BitField<45, 2, PredOperation> op;
1179 BitField<43, 1, u64> negate_a;
1180 BitField<44, 1, u64> abs_a;
1181 BitField<47, 2, HalfType> type_a;
1182 BitField<31, 1, u64> negate_b;
1183 BitField<30, 1, u64> abs_b;
1184 BitField<28, 2, HalfType> type_b;
1185 BitField<42, 1, u64> neg_pred;
1186 BitField<39, 3, u64> pred39;
1187 } hset2;
1188
1189 union {
1190 BitField<39, 3, u64> pred39;
1191 BitField<42, 1, u64> neg_pred;
1192 BitField<44, 1, u64> bf;
1193 BitField<45, 2, PredOperation> op;
1194 BitField<48, 1, u64> is_signed;
1195 BitField<49, 3, PredCondition> cond;
1196 } iset;
1197
1198 union {
1199 BitField<45, 1, u64> negate_a;
1200 BitField<49, 1, u64> abs_a;
1201 BitField<10, 2, Register::Size> src_size;
1202 BitField<13, 1, u64> is_input_signed;
1203 BitField<8, 2, Register::Size> dst_size;
1204 BitField<12, 1, u64> is_output_signed;
1205
1206 union {
1207 BitField<39, 2, u64> tab5cb8_2;
1208 } i2f;
1209
1210 union {
1211 BitField<39, 2, F2iRoundingOp> rounding;
1212 } f2i;
1213
1214 union {
1215 BitField<39, 4, u64> rounding;
1216 // H0, H1 extract for F16 missing
1217 BitField<41, 1, u64> selector; // Guessed as some games set it, TODO: reverse this value
1218 [[nodiscard]] F2fRoundingOp GetRoundingMode() const {
1219 constexpr u64 rounding_mask = 0x0B;
1220 return static_cast<F2fRoundingOp>(rounding.Value() & rounding_mask);
1221 }
1222 } f2f;
1223
1224 union {
1225 BitField<41, 2, u64> selector;
1226 } int_src;
1227
1228 union {
1229 BitField<41, 1, u64> selector;
1230 } float_src;
1231 } conversion;
1232
1233 union {
1234 BitField<28, 1, u64> array;
1235 BitField<29, 2, TextureType> texture_type;
1236 BitField<31, 4, u64> component_mask;
1237 BitField<49, 1, u64> nodep_flag;
1238 BitField<50, 1, u64> dc_flag;
1239 BitField<54, 1, u64> aoffi_flag;
1240 BitField<55, 3, TextureProcessMode> process_mode;
1241
1242 [[nodiscard]] bool IsComponentEnabled(std::size_t component) const {
1243 return ((1ULL << component) & component_mask) != 0;
1244 }
1245
1246 [[nodiscard]] TextureProcessMode GetTextureProcessMode() const {
1247 return process_mode;
1248 }
1249
1250 [[nodiscard]] bool UsesMiscMode(TextureMiscMode mode) const {
1251 switch (mode) {
1252 case TextureMiscMode::DC:
1253 return dc_flag != 0;
1254 case TextureMiscMode::NODEP:
1255 return nodep_flag != 0;
1256 case TextureMiscMode::AOFFI:
1257 return aoffi_flag != 0;
1258 default:
1259 break;
1260 }
1261 return false;
1262 }
1263 } tex;
1264
1265 union {
1266 BitField<28, 1, u64> array;
1267 BitField<29, 2, TextureType> texture_type;
1268 BitField<31, 4, u64> component_mask;
1269 BitField<49, 1, u64> nodep_flag;
1270 BitField<50, 1, u64> dc_flag;
1271 BitField<36, 1, u64> aoffi_flag;
1272 BitField<37, 3, TextureProcessMode> process_mode;
1273
1274 [[nodiscard]] bool IsComponentEnabled(std::size_t component) const {
1275 return ((1ULL << component) & component_mask) != 0;
1276 }
1277
1278 [[nodiscard]] TextureProcessMode GetTextureProcessMode() const {
1279 return process_mode;
1280 }
1281
1282 [[nodiscard]] bool UsesMiscMode(TextureMiscMode mode) const {
1283 switch (mode) {
1284 case TextureMiscMode::DC:
1285 return dc_flag != 0;
1286 case TextureMiscMode::NODEP:
1287 return nodep_flag != 0;
1288 case TextureMiscMode::AOFFI:
1289 return aoffi_flag != 0;
1290 default:
1291 break;
1292 }
1293 return false;
1294 }
1295 } tex_b;
1296
1297 union {
1298 BitField<22, 6, TextureQueryType> query_type;
1299 BitField<31, 4, u64> component_mask;
1300 BitField<49, 1, u64> nodep_flag;
1301
1302 [[nodiscard]] bool UsesMiscMode(TextureMiscMode mode) const {
1303 switch (mode) {
1304 case TextureMiscMode::NODEP:
1305 return nodep_flag != 0;
1306 default:
1307 break;
1308 }
1309 return false;
1310 }
1311
1312 [[nodiscard]] bool IsComponentEnabled(std::size_t component) const {
1313 return ((1ULL << component) & component_mask) != 0;
1314 }
1315 } txq;
1316
1317 union {
1318 BitField<28, 1, u64> array;
1319 BitField<29, 2, TextureType> texture_type;
1320 BitField<31, 4, u64> component_mask;
1321 BitField<35, 1, u64> ndv_flag;
1322 BitField<49, 1, u64> nodep_flag;
1323
1324 [[nodiscard]] bool IsComponentEnabled(std::size_t component) const {
1325 return ((1ULL << component) & component_mask) != 0;
1326 }
1327
1328 [[nodiscard]] bool UsesMiscMode(TextureMiscMode mode) const {
1329 switch (mode) {
1330 case TextureMiscMode::NDV:
1331 return (ndv_flag != 0);
1332 case TextureMiscMode::NODEP:
1333 return (nodep_flag != 0);
1334 default:
1335 break;
1336 }
1337 return false;
1338 }
1339 } tmml;
1340
1341 union {
1342 BitField<28, 1, u64> array;
1343 BitField<29, 2, TextureType> texture_type;
1344 BitField<35, 1, u64> ndv_flag;
1345 BitField<49, 1, u64> nodep_flag;
1346 BitField<50, 1, u64> dc_flag;
1347 BitField<54, 2, u64> offset_mode;
1348 BitField<56, 2, u64> component;
1349
1350 [[nodiscard]] bool UsesMiscMode(TextureMiscMode mode) const {
1351 switch (mode) {
1352 case TextureMiscMode::NDV:
1353 return ndv_flag != 0;
1354 case TextureMiscMode::NODEP:
1355 return nodep_flag != 0;
1356 case TextureMiscMode::DC:
1357 return dc_flag != 0;
1358 case TextureMiscMode::AOFFI:
1359 return offset_mode == 1;
1360 case TextureMiscMode::PTP:
1361 return offset_mode == 2;
1362 default:
1363 break;
1364 }
1365 return false;
1366 }
1367 } tld4;
1368
1369 union {
1370 BitField<35, 1, u64> ndv_flag;
1371 BitField<49, 1, u64> nodep_flag;
1372 BitField<50, 1, u64> dc_flag;
1373 BitField<33, 2, u64> offset_mode;
1374 BitField<37, 2, u64> component;
1375
1376 [[nodiscard]] bool UsesMiscMode(TextureMiscMode mode) const {
1377 switch (mode) {
1378 case TextureMiscMode::NDV:
1379 return ndv_flag != 0;
1380 case TextureMiscMode::NODEP:
1381 return nodep_flag != 0;
1382 case TextureMiscMode::DC:
1383 return dc_flag != 0;
1384 case TextureMiscMode::AOFFI:
1385 return offset_mode == 1;
1386 case TextureMiscMode::PTP:
1387 return offset_mode == 2;
1388 default:
1389 break;
1390 }
1391 return false;
1392 }
1393 } tld4_b;
1394
1395 union {
1396 BitField<49, 1, u64> nodep_flag;
1397 BitField<50, 1, u64> dc_flag;
1398 BitField<51, 1, u64> aoffi_flag;
1399 BitField<52, 2, u64> component;
1400 BitField<55, 1, u64> fp16_flag;
1401
1402 [[nodiscard]] bool UsesMiscMode(TextureMiscMode mode) const {
1403 switch (mode) {
1404 case TextureMiscMode::DC:
1405 return dc_flag != 0;
1406 case TextureMiscMode::NODEP:
1407 return nodep_flag != 0;
1408 case TextureMiscMode::AOFFI:
1409 return aoffi_flag != 0;
1410 default:
1411 break;
1412 }
1413 return false;
1414 }
1415 } tld4s;
1416
1417 union {
1418 BitField<0, 8, Register> gpr0;
1419 BitField<28, 8, Register> gpr28;
1420 BitField<49, 1, u64> nodep_flag;
1421 BitField<50, 3, u64> component_mask_selector;
1422 BitField<53, 4, u64> texture_info;
1423 BitField<59, 1, u64> fp32_flag;
1424
1425 [[nodiscard]] TextureType GetTextureType() const {
1426 // The TEXS instruction has a weird encoding for the texture type.
1427 if (texture_info == 0) {
1428 return TextureType::Texture1D;
1429 }
1430 if (texture_info >= 1 && texture_info <= 9) {
1431 return TextureType::Texture2D;
1432 }
1433 if (texture_info >= 10 && texture_info <= 11) {
1434 return TextureType::Texture3D;
1435 }
1436 if (texture_info >= 12 && texture_info <= 13) {
1437 return TextureType::TextureCube;
1438 }
1439
1440 LOG_CRITICAL(HW_GPU, "Unhandled texture_info: {}", texture_info.Value());
1441 UNREACHABLE();
1442 return TextureType::Texture1D;
1443 }
1444
1445 [[nodiscard]] TextureProcessMode GetTextureProcessMode() const {
1446 switch (texture_info) {
1447 case 0:
1448 case 2:
1449 case 6:
1450 case 8:
1451 case 9:
1452 case 11:
1453 return TextureProcessMode::LZ;
1454 case 3:
1455 case 5:
1456 case 13:
1457 return TextureProcessMode::LL;
1458 default:
1459 break;
1460 }
1461 return TextureProcessMode::None;
1462 }
1463
1464 [[nodiscard]] bool UsesMiscMode(TextureMiscMode mode) const {
1465 switch (mode) {
1466 case TextureMiscMode::DC:
1467 return (texture_info >= 4 && texture_info <= 6) || texture_info == 9;
1468 case TextureMiscMode::NODEP:
1469 return nodep_flag != 0;
1470 default:
1471 break;
1472 }
1473 return false;
1474 }
1475
1476 [[nodiscard]] bool IsArrayTexture() const {
1477 // TEXS only supports Texture2D arrays.
1478 return texture_info >= 7 && texture_info <= 9;
1479 }
1480
1481 [[nodiscard]] bool HasTwoDestinations() const {
1482 return gpr28.Value() != Register::ZeroIndex;
1483 }
1484
1485 [[nodiscard]] bool IsComponentEnabled(std::size_t component) const {
1486 static constexpr std::array<std::array<u32, 8>, 4> mask_lut{{
1487 {},
1488 {0x1, 0x2, 0x4, 0x8, 0x3, 0x9, 0xa, 0xc},
1489 {0x1, 0x2, 0x4, 0x8, 0x3, 0x9, 0xa, 0xc},
1490 {0x7, 0xb, 0xd, 0xe, 0xf},
1491 }};
1492
1493 std::size_t index{gpr0.Value() != Register::ZeroIndex ? 1U : 0U};
1494 index |= gpr28.Value() != Register::ZeroIndex ? 2 : 0;
1495
1496 u32 mask = mask_lut[index][component_mask_selector];
1497 // A mask of 0 means this instruction uses an unimplemented mask.
1498 ASSERT(mask != 0);
1499 return ((1ull << component) & mask) != 0;
1500 }
1501 } texs;
1502
1503 union {
1504 BitField<28, 1, u64> is_array;
1505 BitField<29, 2, TextureType> texture_type;
1506 BitField<35, 1, u64> aoffi;
1507 BitField<49, 1, u64> nodep_flag;
1508 BitField<50, 1, u64> ms; // Multisample?
1509 BitField<54, 1, u64> cl;
1510 BitField<55, 1, u64> process_mode;
1511
1512 [[nodiscard]] TextureProcessMode GetTextureProcessMode() const {
1513 return process_mode == 0 ? TextureProcessMode::LZ : TextureProcessMode::LL;
1514 }
1515 } tld;
1516
1517 union {
1518 BitField<49, 1, u64> nodep_flag;
1519 BitField<53, 4, u64> texture_info;
1520 BitField<59, 1, u64> fp32_flag;
1521
1522 [[nodiscard]] TextureType GetTextureType() const {
1523 // The TLDS instruction has a weird encoding for the texture type.
1524 if (texture_info <= 1) {
1525 return TextureType::Texture1D;
1526 }
1527 if (texture_info == 2 || texture_info == 8 || texture_info == 12 ||
1528 (texture_info >= 4 && texture_info <= 6)) {
1529 return TextureType::Texture2D;
1530 }
1531 if (texture_info == 7) {
1532 return TextureType::Texture3D;
1533 }
1534
1535 LOG_CRITICAL(HW_GPU, "Unhandled texture_info: {}", texture_info.Value());
1536 UNREACHABLE();
1537 return TextureType::Texture1D;
1538 }
1539
1540 [[nodiscard]] TextureProcessMode GetTextureProcessMode() const {
1541 if (texture_info == 1 || texture_info == 5 || texture_info == 12) {
1542 return TextureProcessMode::LL;
1543 }
1544 return TextureProcessMode::LZ;
1545 }
1546
1547 [[nodiscard]] bool UsesMiscMode(TextureMiscMode mode) const {
1548 switch (mode) {
1549 case TextureMiscMode::AOFFI:
1550 return texture_info == 12 || texture_info == 4;
1551 case TextureMiscMode::MZ:
1552 return texture_info == 5;
1553 case TextureMiscMode::NODEP:
1554 return nodep_flag != 0;
1555 default:
1556 break;
1557 }
1558 return false;
1559 }
1560
1561 [[nodiscard]] bool IsArrayTexture() const {
1562 // TEXS only supports Texture2D arrays.
1563 return texture_info == 8;
1564 }
1565 } tlds;
1566
1567 union {
1568 BitField<28, 1, u64> is_array;
1569 BitField<29, 2, TextureType> texture_type;
1570 BitField<35, 1, u64> aoffi_flag;
1571 BitField<49, 1, u64> nodep_flag;
1572
1573 [[nodiscard]] bool UsesMiscMode(TextureMiscMode mode) const {
1574 switch (mode) {
1575 case TextureMiscMode::AOFFI:
1576 return aoffi_flag != 0;
1577 case TextureMiscMode::NODEP:
1578 return nodep_flag != 0;
1579 default:
1580 break;
1581 }
1582 return false;
1583 }
1584
1585 } txd;
1586
1587 union {
1588 BitField<24, 2, StoreCacheManagement> cache_management;
1589 BitField<33, 3, ImageType> image_type;
1590 BitField<49, 2, OutOfBoundsStore> out_of_bounds_store;
1591 BitField<51, 1, u64> is_immediate;
1592 BitField<52, 1, SurfaceDataMode> mode;
1593
1594 BitField<20, 3, StoreType> store_data_layout;
1595 BitField<20, 4, u64> component_mask_selector;
1596
1597 [[nodiscard]] bool IsComponentEnabled(std::size_t component) const {
1598 ASSERT(mode == SurfaceDataMode::P);
1599 constexpr u8 R = 0b0001;
1600 constexpr u8 G = 0b0010;
1601 constexpr u8 B = 0b0100;
1602 constexpr u8 A = 0b1000;
1603 constexpr std::array<u8, 16> mask = {
1604 0, (R), (G), (R | G), (B), (R | B),
1605 (G | B), (R | G | B), (A), (R | A), (G | A), (R | G | A),
1606 (B | A), (R | B | A), (G | B | A), (R | G | B | A)};
1607 return std::bitset<4>{mask.at(component_mask_selector)}.test(component);
1608 }
1609
1610 [[nodiscard]] StoreType GetStoreDataLayout() const {
1611 ASSERT(mode == SurfaceDataMode::D_BA);
1612 return store_data_layout;
1613 }
1614 } suldst;
1615
1616 union {
1617 BitField<28, 1, u64> is_ba;
1618 BitField<51, 3, ImageAtomicOperationType> operation_type;
1619 BitField<33, 3, ImageType> image_type;
1620 BitField<29, 4, ImageAtomicOperation> operation;
1621 BitField<49, 2, OutOfBoundsStore> out_of_bounds_store;
1622 } suatom_d;
1623
1624 union {
1625 BitField<20, 24, u64> target;
1626 BitField<5, 1, u64> constant_buffer;
1627
1628 [[nodiscard]] s32 GetBranchTarget() const {
1629 // Sign extend the branch target offset
1630 const auto mask = 1U << (24 - 1);
1631 const auto target_value = static_cast<u32>(target);
1632 constexpr auto instruction_size = static_cast<s32>(sizeof(Instruction));
1633
1634 // The branch offset is relative to the next instruction and is stored in bytes, so
1635 // divide it by the size of an instruction and add 1 to it.
1636 return static_cast<s32>((target_value ^ mask) - mask) / instruction_size + 1;
1637 }
1638 } bra;
1639
1640 union {
1641 BitField<20, 24, u64> target;
1642 BitField<5, 1, u64> constant_buffer;
1643
1644 [[nodiscard]] s32 GetBranchExtend() const {
1645 // Sign extend the branch target offset
1646 const auto mask = 1U << (24 - 1);
1647 const auto target_value = static_cast<u32>(target);
1648 constexpr auto instruction_size = static_cast<s32>(sizeof(Instruction));
1649
1650 // The branch offset is relative to the next instruction and is stored in bytes, so
1651 // divide it by the size of an instruction and add 1 to it.
1652 return static_cast<s32>((target_value ^ mask) - mask) / instruction_size + 1;
1653 }
1654 } brx;
1655
1656 union {
1657 BitField<39, 1, u64> emit; // EmitVertex
1658 BitField<40, 1, u64> cut; // EndPrimitive
1659 } out;
1660
1661 union {
1662 BitField<31, 1, u64> skew;
1663 BitField<32, 1, u64> o;
1664 BitField<33, 2, IsberdMode> mode;
1665 BitField<47, 2, IsberdShift> shift;
1666 } isberd;
1667
1668 union {
1669 BitField<8, 2, MembarType> type;
1670 BitField<0, 2, MembarUnknown> unknown;
1671 } membar;
1672
1673 union {
1674 BitField<48, 1, u64> signed_a;
1675 BitField<38, 1, u64> is_byte_chunk_a;
1676 BitField<36, 2, VideoType> type_a;
1677 BitField<36, 2, u64> byte_height_a;
1678
1679 BitField<49, 1, u64> signed_b;
1680 BitField<50, 1, u64> use_register_b;
1681 BitField<30, 1, u64> is_byte_chunk_b;
1682 BitField<28, 2, VideoType> type_b;
1683 BitField<28, 2, u64> byte_height_b;
1684 } video;
1685
1686 union {
1687 BitField<51, 2, VmadShr> shr;
1688 BitField<55, 1, u64> saturate; // Saturates the result (a * b + c)
1689 BitField<47, 1, u64> cc;
1690 } vmad;
1691
1692 union {
1693 BitField<54, 1, u64> is_dest_signed;
1694 BitField<48, 1, u64> is_src_a_signed;
1695 BitField<49, 1, u64> is_src_b_signed;
1696 BitField<37, 2, u64> src_format_a;
1697 BitField<29, 2, u64> src_format_b;
1698 BitField<56, 1, u64> mx;
1699 BitField<55, 1, u64> sat;
1700 BitField<36, 2, u64> selector_a;
1701 BitField<28, 2, u64> selector_b;
1702 BitField<50, 1, u64> is_op_b_register;
1703 BitField<51, 3, VmnmxOperation> operation;
1704
1705 [[nodiscard]] VmnmxType SourceFormatA() const {
1706 switch (src_format_a) {
1707 case 0b11:
1708 return VmnmxType::Bits32;
1709 case 0b10:
1710 return VmnmxType::Bits16;
1711 default:
1712 return VmnmxType::Bits8;
1713 }
1714 }
1715
1716 [[nodiscard]] VmnmxType SourceFormatB() const {
1717 switch (src_format_b) {
1718 case 0b11:
1719 return VmnmxType::Bits32;
1720 case 0b10:
1721 return VmnmxType::Bits16;
1722 default:
1723 return VmnmxType::Bits8;
1724 }
1725 }
1726 } vmnmx;
1727
1728 union {
1729 BitField<20, 16, u64> imm20_16;
1730 BitField<35, 1, u64> high_b_rr; // used on RR
1731 BitField<36, 1, u64> product_shift_left;
1732 BitField<37, 1, u64> merge_37;
1733 BitField<48, 1, u64> sign_a;
1734 BitField<49, 1, u64> sign_b;
1735 BitField<50, 2, XmadMode> mode_cbf; // used by CR, RC
1736 BitField<50, 3, XmadMode> mode;
1737 BitField<52, 1, u64> high_b;
1738 BitField<53, 1, u64> high_a;
1739 BitField<55, 1, u64> product_shift_left_second; // used on CR
1740 BitField<56, 1, u64> merge_56;
1741 } xmad;
1742
1743 union {
1744 BitField<20, 14, u64> shifted_offset;
1745 BitField<34, 5, u64> index;
1746
1747 [[nodiscard]] u64 GetOffset() const {
1748 return shifted_offset * 4;
1749 }
1750 } cbuf34;
1751
1752 union {
1753 BitField<20, 16, s64> offset;
1754 BitField<36, 5, u64> index;
1755
1756 [[nodiscard]] s64 GetOffset() const {
1757 return offset;
1758 }
1759 } cbuf36;
1760
1761 // Unsure about the size of this one.
1762 // It's always used with a gpr0, so any size should be fine.
1763 BitField<20, 8, SystemVariable> sys20;
1764
1765 BitField<47, 1, u64> generates_cc;
1766 BitField<61, 1, u64> is_b_imm;
1767 BitField<60, 1, u64> is_b_gpr;
1768 BitField<59, 1, u64> is_c_gpr;
1769 BitField<20, 24, s64> smem_imm;
1770 BitField<0, 5, ConditionCode> flow_condition_code;
1771
1772 Attribute attribute;
1773 Sampler sampler;
1774 Image image;
1775
1776 u64 value;
1777};
1778static_assert(sizeof(Instruction) == 0x8, "Incorrect structure size");
1779static_assert(std::is_standard_layout_v<Instruction>, "Instruction is not standard layout");
1780
1781class OpCode {
1782public:
1783 enum class Id {
1784 KIL,
1785 SSY,
1786 SYNC,
1787 BRK,
1788 DEPBAR,
1789 VOTE,
1790 VOTE_VTG,
1791 SHFL,
1792 FSWZADD,
1793 BFE_C,
1794 BFE_R,
1795 BFE_IMM,
1796 BFI_RC,
1797 BFI_IMM_R,
1798 BRA,
1799 BRX,
1800 PBK,
1801 LD_A,
1802 LD_L,
1803 LD_S,
1804 LD_C,
1805 LD, // Load from generic memory
1806 LDG, // Load from global memory
1807 ST_A,
1808 ST_L,
1809 ST_S,
1810 ST, // Store in generic memory
1811 STG, // Store in global memory
1812 RED, // Reduction operation
1813 ATOM, // Atomic operation on global memory
1814 ATOMS, // Atomic operation on shared memory
1815 AL2P, // Transforms attribute memory into physical memory
1816 TEX,
1817 TEX_B, // Texture Load Bindless
1818 TXQ, // Texture Query
1819 TXQ_B, // Texture Query Bindless
1820 TEXS, // Texture Fetch with scalar/non-vec4 source/destinations
1821 TLD, // Texture Load
1822 TLDS, // Texture Load with scalar/non-vec4 source/destinations
1823 TLD4, // Texture Gather 4
1824 TLD4_B, // Texture Gather 4 Bindless
1825 TLD4S, // Texture Load 4 with scalar / non - vec4 source / destinations
1826 TMML_B, // Texture Mip Map Level
1827 TMML, // Texture Mip Map Level
1828 TXD, // Texture Gradient/Load with Derivates
1829 TXD_B, // Texture Gradient/Load with Derivates Bindless
1830 SUST, // Surface Store
1831 SULD, // Surface Load
1832 SUATOM, // Surface Atomic Operation
1833 EXIT,
1834 NOP,
1835 IPA,
1836 OUT_R, // Emit vertex/primitive
1837 ISBERD,
1838 BAR,
1839 MEMBAR,
1840 VMAD,
1841 VSETP,
1842 VMNMX,
1843 FFMA_IMM, // Fused Multiply and Add
1844 FFMA_CR,
1845 FFMA_RC,
1846 FFMA_RR,
1847 FADD_C,
1848 FADD_R,
1849 FADD_IMM,
1850 FADD32I,
1851 FMUL_C,
1852 FMUL_R,
1853 FMUL_IMM,
1854 FMUL32_IMM,
1855 IADD_C,
1856 IADD_R,
1857 IADD_IMM,
1858 IADD3_C, // Add 3 Integers
1859 IADD3_R,
1860 IADD3_IMM,
1861 IADD32I,
1862 ISCADD_C, // Scale and Add
1863 ISCADD_R,
1864 ISCADD_IMM,
1865 FLO_R,
1866 FLO_C,
1867 FLO_IMM,
1868 LEA_R1,
1869 LEA_R2,
1870 LEA_RZ,
1871 LEA_IMM,
1872 LEA_HI,
1873 HADD2_C,
1874 HADD2_R,
1875 HADD2_IMM,
1876 HMUL2_C,
1877 HMUL2_R,
1878 HMUL2_IMM,
1879 HFMA2_CR,
1880 HFMA2_RC,
1881 HFMA2_RR,
1882 HFMA2_IMM_R,
1883 HSETP2_C,
1884 HSETP2_R,
1885 HSETP2_IMM,
1886 HSET2_C,
1887 HSET2_R,
1888 HSET2_IMM,
1889 POPC_C,
1890 POPC_R,
1891 POPC_IMM,
1892 SEL_C,
1893 SEL_R,
1894 SEL_IMM,
1895 ICMP_RC,
1896 ICMP_R,
1897 ICMP_CR,
1898 ICMP_IMM,
1899 FCMP_RR,
1900 FCMP_RC,
1901 FCMP_IMMR,
1902 MUFU, // Multi-Function Operator
1903 RRO_C, // Range Reduction Operator
1904 RRO_R,
1905 RRO_IMM,
1906 F2F_C,
1907 F2F_R,
1908 F2F_IMM,
1909 F2I_C,
1910 F2I_R,
1911 F2I_IMM,
1912 I2F_C,
1913 I2F_R,
1914 I2F_IMM,
1915 I2I_C,
1916 I2I_R,
1917 I2I_IMM,
1918 LOP_C,
1919 LOP_R,
1920 LOP_IMM,
1921 LOP32I,
1922 LOP3_C,
1923 LOP3_R,
1924 LOP3_IMM,
1925 MOV_C,
1926 MOV_R,
1927 MOV_IMM,
1928 S2R,
1929 MOV32_IMM,
1930 SHL_C,
1931 SHL_R,
1932 SHL_IMM,
1933 SHR_C,
1934 SHR_R,
1935 SHR_IMM,
1936 SHF_RIGHT_R,
1937 SHF_RIGHT_IMM,
1938 SHF_LEFT_R,
1939 SHF_LEFT_IMM,
1940 FMNMX_C,
1941 FMNMX_R,
1942 FMNMX_IMM,
1943 IMNMX_C,
1944 IMNMX_R,
1945 IMNMX_IMM,
1946 FSETP_C, // Set Predicate
1947 FSETP_R,
1948 FSETP_IMM,
1949 FSET_C,
1950 FSET_R,
1951 FSET_IMM,
1952 ISETP_C,
1953 ISETP_IMM,
1954 ISETP_R,
1955 ISET_R,
1956 ISET_C,
1957 ISET_IMM,
1958 PSETP,
1959 PSET,
1960 CSETP,
1961 R2P_IMM,
1962 P2R_IMM,
1963 XMAD_IMM,
1964 XMAD_CR,
1965 XMAD_RC,
1966 XMAD_RR,
1967 };
1968
1969 enum class Type {
1970 Trivial,
1971 Arithmetic,
1972 ArithmeticImmediate,
1973 ArithmeticInteger,
1974 ArithmeticIntegerImmediate,
1975 ArithmeticHalf,
1976 ArithmeticHalfImmediate,
1977 Bfe,
1978 Bfi,
1979 Shift,
1980 Ffma,
1981 Hfma2,
1982 Flow,
1983 Synch,
1984 Warp,
1985 Memory,
1986 Texture,
1987 Image,
1988 FloatSet,
1989 FloatSetPredicate,
1990 IntegerSet,
1991 IntegerSetPredicate,
1992 HalfSet,
1993 HalfSetPredicate,
1994 PredicateSetPredicate,
1995 PredicateSetRegister,
1996 RegisterSetPredicate,
1997 Conversion,
1998 Video,
1999 Xmad,
2000 Unknown,
2001 };
2002
2003 /// Returns whether an opcode has an execution predicate field or not (ie, whether it can be
2004 /// conditionally executed).
2005 [[nodiscard]] static bool IsPredicatedInstruction(Id opcode) {
2006 // TODO(Subv): Add the rest of unpredicated instructions.
2007 return opcode != Id::SSY && opcode != Id::PBK;
2008 }
2009
2010 class Matcher {
2011 public:
2012 constexpr Matcher(const char* const name_, u16 mask_, u16 expected_, Id id_, Type type_)
2013 : name{name_}, mask{mask_}, expected{expected_}, id{id_}, type{type_} {}
2014
2015 [[nodiscard]] constexpr const char* GetName() const {
2016 return name;
2017 }
2018
2019 [[nodiscard]] constexpr u16 GetMask() const {
2020 return mask;
2021 }
2022
2023 [[nodiscard]] constexpr Id GetId() const {
2024 return id;
2025 }
2026
2027 [[nodiscard]] constexpr Type GetType() const {
2028 return type;
2029 }
2030
2031 /**
2032 * Tests to see if the given instruction is the instruction this matcher represents.
2033 * @param instruction The instruction to test
2034 * @returns true if the given instruction matches.
2035 */
2036 [[nodiscard]] constexpr bool Matches(u16 instruction) const {
2037 return (instruction & mask) == expected;
2038 }
2039
2040 private:
2041 const char* name;
2042 u16 mask;
2043 u16 expected;
2044 Id id;
2045 Type type;
2046 };
2047
2048 using DecodeResult = std::optional<std::reference_wrapper<const Matcher>>;
2049 [[nodiscard]] static DecodeResult Decode(Instruction instr) {
2050 static const auto table{GetDecodeTable()};
2051
2052 const auto matches_instruction = [instr](const auto& matcher) {
2053 return matcher.Matches(static_cast<u16>(instr.opcode));
2054 };
2055
2056 auto iter = std::find_if(table.begin(), table.end(), matches_instruction);
2057 return iter != table.end() ? std::optional<std::reference_wrapper<const Matcher>>(*iter)
2058 : std::nullopt;
2059 }
2060
2061private:
2062 struct Detail {
2063 private:
2064 static constexpr std::size_t opcode_bitsize = 16;
2065
2066 /**
2067 * Generates the mask and the expected value after masking from a given bitstring.
2068 * A '0' in a bitstring indicates that a zero must be present at that bit position.
2069 * A '1' in a bitstring indicates that a one must be present at that bit position.
2070 */
2071 [[nodiscard]] static constexpr auto GetMaskAndExpect(const char* const bitstring) {
2072 u16 mask = 0, expect = 0;
2073 for (std::size_t i = 0; i < opcode_bitsize; i++) {
2074 const std::size_t bit_position = opcode_bitsize - i - 1;
2075 switch (bitstring[i]) {
2076 case '0':
2077 mask |= static_cast<u16>(1U << bit_position);
2078 break;
2079 case '1':
2080 expect |= static_cast<u16>(1U << bit_position);
2081 mask |= static_cast<u16>(1U << bit_position);
2082 break;
2083 default:
2084 // Ignore
2085 break;
2086 }
2087 }
2088 return std::make_pair(mask, expect);
2089 }
2090
2091 public:
2092 /// Creates a matcher that can match and parse instructions based on bitstring.
2093 [[nodiscard]] static constexpr auto GetMatcher(const char* const bitstring, Id op,
2094 Type type, const char* const name) {
2095 const auto [mask, expected] = GetMaskAndExpect(bitstring);
2096 return Matcher(name, mask, expected, op, type);
2097 }
2098 };
2099
2100 [[nodiscard]] static std::vector<Matcher> GetDecodeTable() {
2101 std::vector<Matcher> table = {
2102#define INST(bitstring, op, type, name) Detail::GetMatcher(bitstring, op, type, name)
2103 INST("111000110011----", Id::KIL, Type::Flow, "KIL"),
2104 INST("111000101001----", Id::SSY, Type::Flow, "SSY"),
2105 INST("111000101010----", Id::PBK, Type::Flow, "PBK"),
2106 INST("111000100100----", Id::BRA, Type::Flow, "BRA"),
2107 INST("111000100101----", Id::BRX, Type::Flow, "BRX"),
2108 INST("1111000011111---", Id::SYNC, Type::Flow, "SYNC"),
2109 INST("111000110100----", Id::BRK, Type::Flow, "BRK"),
2110 INST("111000110000----", Id::EXIT, Type::Flow, "EXIT"),
2111 INST("1111000011110---", Id::DEPBAR, Type::Synch, "DEPBAR"),
2112 INST("0101000011011---", Id::VOTE, Type::Warp, "VOTE"),
2113 INST("0101000011100---", Id::VOTE_VTG, Type::Warp, "VOTE_VTG"),
2114 INST("1110111100010---", Id::SHFL, Type::Warp, "SHFL"),
2115 INST("0101000011111---", Id::FSWZADD, Type::Warp, "FSWZADD"),
2116 INST("1110111111011---", Id::LD_A, Type::Memory, "LD_A"),
2117 INST("1110111101001---", Id::LD_S, Type::Memory, "LD_S"),
2118 INST("1110111101000---", Id::LD_L, Type::Memory, "LD_L"),
2119 INST("1110111110010---", Id::LD_C, Type::Memory, "LD_C"),
2120 INST("100-------------", Id::LD, Type::Memory, "LD"),
2121 INST("1110111011010---", Id::LDG, Type::Memory, "LDG"),
2122 INST("1110111111110---", Id::ST_A, Type::Memory, "ST_A"),
2123 INST("1110111101011---", Id::ST_S, Type::Memory, "ST_S"),
2124 INST("1110111101010---", Id::ST_L, Type::Memory, "ST_L"),
2125 INST("101-------------", Id::ST, Type::Memory, "ST"),
2126 INST("1110111011011---", Id::STG, Type::Memory, "STG"),
2127 INST("1110101111111---", Id::RED, Type::Memory, "RED"),
2128 INST("11101101--------", Id::ATOM, Type::Memory, "ATOM"),
2129 INST("11101100--------", Id::ATOMS, Type::Memory, "ATOMS"),
2130 INST("1110111110100---", Id::AL2P, Type::Memory, "AL2P"),
2131 INST("110000----111---", Id::TEX, Type::Texture, "TEX"),
2132 INST("1101111010111---", Id::TEX_B, Type::Texture, "TEX_B"),
2133 INST("1101111101001---", Id::TXQ, Type::Texture, "TXQ"),
2134 INST("1101111101010---", Id::TXQ_B, Type::Texture, "TXQ_B"),
2135 INST("1101-00---------", Id::TEXS, Type::Texture, "TEXS"),
2136 INST("11011100--11----", Id::TLD, Type::Texture, "TLD"),
2137 INST("1101-01---------", Id::TLDS, Type::Texture, "TLDS"),
2138 INST("110010----111---", Id::TLD4, Type::Texture, "TLD4"),
2139 INST("1101111011111---", Id::TLD4_B, Type::Texture, "TLD4_B"),
2140 INST("11011111-0------", Id::TLD4S, Type::Texture, "TLD4S"),
2141 INST("110111110110----", Id::TMML_B, Type::Texture, "TMML_B"),
2142 INST("1101111101011---", Id::TMML, Type::Texture, "TMML"),
2143 INST("11011110011110--", Id::TXD_B, Type::Texture, "TXD_B"),
2144 INST("11011110001110--", Id::TXD, Type::Texture, "TXD"),
2145 INST("11101011001-----", Id::SUST, Type::Image, "SUST"),
2146 INST("11101011000-----", Id::SULD, Type::Image, "SULD"),
2147 INST("1110101000------", Id::SUATOM, Type::Image, "SUATOM_D"),
2148 INST("0101000010110---", Id::NOP, Type::Trivial, "NOP"),
2149 INST("11100000--------", Id::IPA, Type::Trivial, "IPA"),
2150 INST("1111101111100---", Id::OUT_R, Type::Trivial, "OUT_R"),
2151 INST("1110111111010---", Id::ISBERD, Type::Trivial, "ISBERD"),
2152 INST("1111000010101---", Id::BAR, Type::Trivial, "BAR"),
2153 INST("1110111110011---", Id::MEMBAR, Type::Trivial, "MEMBAR"),
2154 INST("01011111--------", Id::VMAD, Type::Video, "VMAD"),
2155 INST("0101000011110---", Id::VSETP, Type::Video, "VSETP"),
2156 INST("0011101---------", Id::VMNMX, Type::Video, "VMNMX"),
2157 INST("0011001-1-------", Id::FFMA_IMM, Type::Ffma, "FFMA_IMM"),
2158 INST("010010011-------", Id::FFMA_CR, Type::Ffma, "FFMA_CR"),
2159 INST("010100011-------", Id::FFMA_RC, Type::Ffma, "FFMA_RC"),
2160 INST("010110011-------", Id::FFMA_RR, Type::Ffma, "FFMA_RR"),
2161 INST("0100110001011---", Id::FADD_C, Type::Arithmetic, "FADD_C"),
2162 INST("0101110001011---", Id::FADD_R, Type::Arithmetic, "FADD_R"),
2163 INST("0011100-01011---", Id::FADD_IMM, Type::Arithmetic, "FADD_IMM"),
2164 INST("000010----------", Id::FADD32I, Type::ArithmeticImmediate, "FADD32I"),
2165 INST("0100110001101---", Id::FMUL_C, Type::Arithmetic, "FMUL_C"),
2166 INST("0101110001101---", Id::FMUL_R, Type::Arithmetic, "FMUL_R"),
2167 INST("0011100-01101---", Id::FMUL_IMM, Type::Arithmetic, "FMUL_IMM"),
2168 INST("00011110--------", Id::FMUL32_IMM, Type::ArithmeticImmediate, "FMUL32_IMM"),
2169 INST("0100110000010---", Id::IADD_C, Type::ArithmeticInteger, "IADD_C"),
2170 INST("0101110000010---", Id::IADD_R, Type::ArithmeticInteger, "IADD_R"),
2171 INST("0011100-00010---", Id::IADD_IMM, Type::ArithmeticInteger, "IADD_IMM"),
2172 INST("010011001100----", Id::IADD3_C, Type::ArithmeticInteger, "IADD3_C"),
2173 INST("010111001100----", Id::IADD3_R, Type::ArithmeticInteger, "IADD3_R"),
2174 INST("0011100-1100----", Id::IADD3_IMM, Type::ArithmeticInteger, "IADD3_IMM"),
2175 INST("0001110---------", Id::IADD32I, Type::ArithmeticIntegerImmediate, "IADD32I"),
2176 INST("0100110000011---", Id::ISCADD_C, Type::ArithmeticInteger, "ISCADD_C"),
2177 INST("0101110000011---", Id::ISCADD_R, Type::ArithmeticInteger, "ISCADD_R"),
2178 INST("0011100-00011---", Id::ISCADD_IMM, Type::ArithmeticInteger, "ISCADD_IMM"),
2179 INST("0100110000001---", Id::POPC_C, Type::ArithmeticInteger, "POPC_C"),
2180 INST("0101110000001---", Id::POPC_R, Type::ArithmeticInteger, "POPC_R"),
2181 INST("0011100-00001---", Id::POPC_IMM, Type::ArithmeticInteger, "POPC_IMM"),
2182 INST("0100110010100---", Id::SEL_C, Type::ArithmeticInteger, "SEL_C"),
2183 INST("0101110010100---", Id::SEL_R, Type::ArithmeticInteger, "SEL_R"),
2184 INST("0011100-10100---", Id::SEL_IMM, Type::ArithmeticInteger, "SEL_IMM"),
2185 INST("010100110100----", Id::ICMP_RC, Type::ArithmeticInteger, "ICMP_RC"),
2186 INST("010110110100----", Id::ICMP_R, Type::ArithmeticInteger, "ICMP_R"),
2187 INST("010010110100----", Id::ICMP_CR, Type::ArithmeticInteger, "ICMP_CR"),
2188 INST("0011011-0100----", Id::ICMP_IMM, Type::ArithmeticInteger, "ICMP_IMM"),
2189 INST("0101110000110---", Id::FLO_R, Type::ArithmeticInteger, "FLO_R"),
2190 INST("0100110000110---", Id::FLO_C, Type::ArithmeticInteger, "FLO_C"),
2191 INST("0011100-00110---", Id::FLO_IMM, Type::ArithmeticInteger, "FLO_IMM"),
2192 INST("0101101111011---", Id::LEA_R2, Type::ArithmeticInteger, "LEA_R2"),
2193 INST("0101101111010---", Id::LEA_R1, Type::ArithmeticInteger, "LEA_R1"),
2194 INST("001101101101----", Id::LEA_IMM, Type::ArithmeticInteger, "LEA_IMM"),
2195 INST("010010111101----", Id::LEA_RZ, Type::ArithmeticInteger, "LEA_RZ"),
2196 INST("00011000--------", Id::LEA_HI, Type::ArithmeticInteger, "LEA_HI"),
2197 INST("0111101-1-------", Id::HADD2_C, Type::ArithmeticHalf, "HADD2_C"),
2198 INST("0101110100010---", Id::HADD2_R, Type::ArithmeticHalf, "HADD2_R"),
2199 INST("0111101-0-------", Id::HADD2_IMM, Type::ArithmeticHalfImmediate, "HADD2_IMM"),
2200 INST("0111100-1-------", Id::HMUL2_C, Type::ArithmeticHalf, "HMUL2_C"),
2201 INST("0101110100001---", Id::HMUL2_R, Type::ArithmeticHalf, "HMUL2_R"),
2202 INST("0111100-0-------", Id::HMUL2_IMM, Type::ArithmeticHalfImmediate, "HMUL2_IMM"),
2203 INST("01110---1-------", Id::HFMA2_CR, Type::Hfma2, "HFMA2_CR"),
2204 INST("01100---1-------", Id::HFMA2_RC, Type::Hfma2, "HFMA2_RC"),
2205 INST("0101110100000---", Id::HFMA2_RR, Type::Hfma2, "HFMA2_RR"),
2206 INST("01110---0-------", Id::HFMA2_IMM_R, Type::Hfma2, "HFMA2_R_IMM"),
2207 INST("0111111-1-------", Id::HSETP2_C, Type::HalfSetPredicate, "HSETP2_C"),
2208 INST("0101110100100---", Id::HSETP2_R, Type::HalfSetPredicate, "HSETP2_R"),
2209 INST("0111111-0-------", Id::HSETP2_IMM, Type::HalfSetPredicate, "HSETP2_IMM"),
2210 INST("0111110-1-------", Id::HSET2_C, Type::HalfSet, "HSET2_C"),
2211 INST("0101110100011---", Id::HSET2_R, Type::HalfSet, "HSET2_R"),
2212 INST("0111110-0-------", Id::HSET2_IMM, Type::HalfSet, "HSET2_IMM"),
2213 INST("010110111010----", Id::FCMP_RR, Type::Arithmetic, "FCMP_RR"),
2214 INST("010010111010----", Id::FCMP_RC, Type::Arithmetic, "FCMP_RC"),
2215 INST("0011011-1010----", Id::FCMP_IMMR, Type::Arithmetic, "FCMP_IMMR"),
2216 INST("0101000010000---", Id::MUFU, Type::Arithmetic, "MUFU"),
2217 INST("0100110010010---", Id::RRO_C, Type::Arithmetic, "RRO_C"),
2218 INST("0101110010010---", Id::RRO_R, Type::Arithmetic, "RRO_R"),
2219 INST("0011100-10010---", Id::RRO_IMM, Type::Arithmetic, "RRO_IMM"),
2220 INST("0100110010101---", Id::F2F_C, Type::Conversion, "F2F_C"),
2221 INST("0101110010101---", Id::F2F_R, Type::Conversion, "F2F_R"),
2222 INST("0011100-10101---", Id::F2F_IMM, Type::Conversion, "F2F_IMM"),
2223 INST("0100110010110---", Id::F2I_C, Type::Conversion, "F2I_C"),
2224 INST("0101110010110---", Id::F2I_R, Type::Conversion, "F2I_R"),
2225 INST("0011100-10110---", Id::F2I_IMM, Type::Conversion, "F2I_IMM"),
2226 INST("0100110010011---", Id::MOV_C, Type::Arithmetic, "MOV_C"),
2227 INST("0101110010011---", Id::MOV_R, Type::Arithmetic, "MOV_R"),
2228 INST("0011100-10011---", Id::MOV_IMM, Type::Arithmetic, "MOV_IMM"),
2229 INST("1111000011001---", Id::S2R, Type::Trivial, "S2R"),
2230 INST("000000010000----", Id::MOV32_IMM, Type::ArithmeticImmediate, "MOV32_IMM"),
2231 INST("0100110001100---", Id::FMNMX_C, Type::Arithmetic, "FMNMX_C"),
2232 INST("0101110001100---", Id::FMNMX_R, Type::Arithmetic, "FMNMX_R"),
2233 INST("0011100-01100---", Id::FMNMX_IMM, Type::Arithmetic, "FMNMX_IMM"),
2234 INST("0100110000100---", Id::IMNMX_C, Type::ArithmeticInteger, "IMNMX_C"),
2235 INST("0101110000100---", Id::IMNMX_R, Type::ArithmeticInteger, "IMNMX_R"),
2236 INST("0011100-00100---", Id::IMNMX_IMM, Type::ArithmeticInteger, "IMNMX_IMM"),
2237 INST("0100110000000---", Id::BFE_C, Type::Bfe, "BFE_C"),
2238 INST("0101110000000---", Id::BFE_R, Type::Bfe, "BFE_R"),
2239 INST("0011100-00000---", Id::BFE_IMM, Type::Bfe, "BFE_IMM"),
2240 INST("0101001111110---", Id::BFI_RC, Type::Bfi, "BFI_RC"),
2241 INST("0011011-11110---", Id::BFI_IMM_R, Type::Bfi, "BFI_IMM_R"),
2242 INST("0100110001000---", Id::LOP_C, Type::ArithmeticInteger, "LOP_C"),
2243 INST("0101110001000---", Id::LOP_R, Type::ArithmeticInteger, "LOP_R"),
2244 INST("0011100-01000---", Id::LOP_IMM, Type::ArithmeticInteger, "LOP_IMM"),
2245 INST("000001----------", Id::LOP32I, Type::ArithmeticIntegerImmediate, "LOP32I"),
2246 INST("0000001---------", Id::LOP3_C, Type::ArithmeticInteger, "LOP3_C"),
2247 INST("0101101111100---", Id::LOP3_R, Type::ArithmeticInteger, "LOP3_R"),
2248 INST("0011110---------", Id::LOP3_IMM, Type::ArithmeticInteger, "LOP3_IMM"),
2249 INST("0100110001001---", Id::SHL_C, Type::Shift, "SHL_C"),
2250 INST("0101110001001---", Id::SHL_R, Type::Shift, "SHL_R"),
2251 INST("0011100-01001---", Id::SHL_IMM, Type::Shift, "SHL_IMM"),
2252 INST("0100110000101---", Id::SHR_C, Type::Shift, "SHR_C"),
2253 INST("0101110000101---", Id::SHR_R, Type::Shift, "SHR_R"),
2254 INST("0011100-00101---", Id::SHR_IMM, Type::Shift, "SHR_IMM"),
2255 INST("0101110011111---", Id::SHF_RIGHT_R, Type::Shift, "SHF_RIGHT_R"),
2256 INST("0011100-11111---", Id::SHF_RIGHT_IMM, Type::Shift, "SHF_RIGHT_IMM"),
2257 INST("0101101111111---", Id::SHF_LEFT_R, Type::Shift, "SHF_LEFT_R"),
2258 INST("0011011-11111---", Id::SHF_LEFT_IMM, Type::Shift, "SHF_LEFT_IMM"),
2259 INST("0100110011100---", Id::I2I_C, Type::Conversion, "I2I_C"),
2260 INST("0101110011100---", Id::I2I_R, Type::Conversion, "I2I_R"),
2261 INST("0011100-11100---", Id::I2I_IMM, Type::Conversion, "I2I_IMM"),
2262 INST("0100110010111---", Id::I2F_C, Type::Conversion, "I2F_C"),
2263 INST("0101110010111---", Id::I2F_R, Type::Conversion, "I2F_R"),
2264 INST("0011100-10111---", Id::I2F_IMM, Type::Conversion, "I2F_IMM"),
2265 INST("01011000--------", Id::FSET_R, Type::FloatSet, "FSET_R"),
2266 INST("0100100---------", Id::FSET_C, Type::FloatSet, "FSET_C"),
2267 INST("0011000---------", Id::FSET_IMM, Type::FloatSet, "FSET_IMM"),
2268 INST("010010111011----", Id::FSETP_C, Type::FloatSetPredicate, "FSETP_C"),
2269 INST("010110111011----", Id::FSETP_R, Type::FloatSetPredicate, "FSETP_R"),
2270 INST("0011011-1011----", Id::FSETP_IMM, Type::FloatSetPredicate, "FSETP_IMM"),
2271 INST("010010110110----", Id::ISETP_C, Type::IntegerSetPredicate, "ISETP_C"),
2272 INST("010110110110----", Id::ISETP_R, Type::IntegerSetPredicate, "ISETP_R"),
2273 INST("0011011-0110----", Id::ISETP_IMM, Type::IntegerSetPredicate, "ISETP_IMM"),
2274 INST("010110110101----", Id::ISET_R, Type::IntegerSet, "ISET_R"),
2275 INST("010010110101----", Id::ISET_C, Type::IntegerSet, "ISET_C"),
2276 INST("0011011-0101----", Id::ISET_IMM, Type::IntegerSet, "ISET_IMM"),
2277 INST("0101000010001---", Id::PSET, Type::PredicateSetRegister, "PSET"),
2278 INST("0101000010010---", Id::PSETP, Type::PredicateSetPredicate, "PSETP"),
2279 INST("010100001010----", Id::CSETP, Type::PredicateSetPredicate, "CSETP"),
2280 INST("0011100-11110---", Id::R2P_IMM, Type::RegisterSetPredicate, "R2P_IMM"),
2281 INST("0011100-11101---", Id::P2R_IMM, Type::RegisterSetPredicate, "P2R_IMM"),
2282 INST("0011011-00------", Id::XMAD_IMM, Type::Xmad, "XMAD_IMM"),
2283 INST("0100111---------", Id::XMAD_CR, Type::Xmad, "XMAD_CR"),
2284 INST("010100010-------", Id::XMAD_RC, Type::Xmad, "XMAD_RC"),
2285 INST("0101101100------", Id::XMAD_RR, Type::Xmad, "XMAD_RR"),
2286 };
2287#undef INST
2288 std::stable_sort(table.begin(), table.end(), [](const auto& a, const auto& b) {
2289 // If a matcher has more bits in its mask it is more specific, so it
2290 // should come first.
2291 return std::bitset<16>(a.GetMask()).count() > std::bitset<16>(b.GetMask()).count();
2292 });
2293
2294 return table;
2295 }
2296};
2297
2298} // namespace Tegra::Shader
diff --git a/src/video_core/engines/shader_header.h b/src/video_core/engines/shader_header.h
deleted file mode 100644
index e0d7b89c5..000000000
--- a/src/video_core/engines/shader_header.h
+++ /dev/null
@@ -1,158 +0,0 @@
1// Copyright 2018 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <array>
8#include <optional>
9
10#include "common/bit_field.h"
11#include "common/common_funcs.h"
12#include "common/common_types.h"
13
14namespace Tegra::Shader {
15
16enum class OutputTopology : u32 {
17 PointList = 1,
18 LineStrip = 6,
19 TriangleStrip = 7,
20};
21
22enum class PixelImap : u8 {
23 Unused = 0,
24 Constant = 1,
25 Perspective = 2,
26 ScreenLinear = 3,
27};
28
29// Documentation in:
30// http://download.nvidia.com/open-gpu-doc/Shader-Program-Header/1/Shader-Program-Header.html
31struct Header {
32 union {
33 BitField<0, 5, u32> sph_type;
34 BitField<5, 5, u32> version;
35 BitField<10, 4, u32> shader_type;
36 BitField<14, 1, u32> mrt_enable;
37 BitField<15, 1, u32> kills_pixels;
38 BitField<16, 1, u32> does_global_store;
39 BitField<17, 4, u32> sass_version;
40 BitField<21, 5, u32> reserved;
41 BitField<26, 1, u32> does_load_or_store;
42 BitField<27, 1, u32> does_fp64;
43 BitField<28, 4, u32> stream_out_mask;
44 } common0;
45
46 union {
47 BitField<0, 24, u32> shader_local_memory_low_size;
48 BitField<24, 8, u32> per_patch_attribute_count;
49 } common1;
50
51 union {
52 BitField<0, 24, u32> shader_local_memory_high_size;
53 BitField<24, 8, u32> threads_per_input_primitive;
54 } common2;
55
56 union {
57 BitField<0, 24, u32> shader_local_memory_crs_size;
58 BitField<24, 4, OutputTopology> output_topology;
59 BitField<28, 4, u32> reserved;
60 } common3;
61
62 union {
63 BitField<0, 12, u32> max_output_vertices;
64 BitField<12, 8, u32> store_req_start; // NOTE: not used by geometry shaders.
65 BitField<20, 4, u32> reserved;
66 BitField<24, 8, u32> store_req_end; // NOTE: not used by geometry shaders.
67 } common4;
68
69 union {
70 struct {
71 INSERT_PADDING_BYTES_NOINIT(3); // ImapSystemValuesA
72 INSERT_PADDING_BYTES_NOINIT(1); // ImapSystemValuesB
73 INSERT_PADDING_BYTES_NOINIT(16); // ImapGenericVector[32]
74 INSERT_PADDING_BYTES_NOINIT(2); // ImapColor
75 union {
76 BitField<0, 8, u16> clip_distances;
77 BitField<8, 1, u16> point_sprite_s;
78 BitField<9, 1, u16> point_sprite_t;
79 BitField<10, 1, u16> fog_coordinate;
80 BitField<12, 1, u16> tessellation_eval_point_u;
81 BitField<13, 1, u16> tessellation_eval_point_v;
82 BitField<14, 1, u16> instance_id;
83 BitField<15, 1, u16> vertex_id;
84 };
85 INSERT_PADDING_BYTES_NOINIT(5); // ImapFixedFncTexture[10]
86 INSERT_PADDING_BYTES_NOINIT(1); // ImapReserved
87 INSERT_PADDING_BYTES_NOINIT(3); // OmapSystemValuesA
88 INSERT_PADDING_BYTES_NOINIT(1); // OmapSystemValuesB
89 INSERT_PADDING_BYTES_NOINIT(16); // OmapGenericVector[32]
90 INSERT_PADDING_BYTES_NOINIT(2); // OmapColor
91 INSERT_PADDING_BYTES_NOINIT(2); // OmapSystemValuesC
92 INSERT_PADDING_BYTES_NOINIT(5); // OmapFixedFncTexture[10]
93 INSERT_PADDING_BYTES_NOINIT(1); // OmapReserved
94 } vtg;
95
96 struct {
97 INSERT_PADDING_BYTES_NOINIT(3); // ImapSystemValuesA
98 INSERT_PADDING_BYTES_NOINIT(1); // ImapSystemValuesB
99
100 union {
101 BitField<0, 2, PixelImap> x;
102 BitField<2, 2, PixelImap> y;
103 BitField<4, 2, PixelImap> z;
104 BitField<6, 2, PixelImap> w;
105 u8 raw;
106 } imap_generic_vector[32];
107
108 INSERT_PADDING_BYTES_NOINIT(2); // ImapColor
109 INSERT_PADDING_BYTES_NOINIT(2); // ImapSystemValuesC
110 INSERT_PADDING_BYTES_NOINIT(10); // ImapFixedFncTexture[10]
111 INSERT_PADDING_BYTES_NOINIT(2); // ImapReserved
112
113 struct {
114 u32 target;
115 union {
116 BitField<0, 1, u32> sample_mask;
117 BitField<1, 1, u32> depth;
118 BitField<2, 30, u32> reserved;
119 };
120 } omap;
121
122 bool IsColorComponentOutputEnabled(u32 render_target, u32 component) const {
123 const u32 bit = render_target * 4 + component;
124 return omap.target & (1 << bit);
125 }
126
127 PixelImap GetPixelImap(u32 attribute) const {
128 const auto get_index = [this, attribute](u32 index) {
129 return static_cast<PixelImap>(
130 (imap_generic_vector[attribute].raw >> (index * 2)) & 3);
131 };
132
133 std::optional<PixelImap> result;
134 for (u32 component = 0; component < 4; ++component) {
135 const PixelImap index = get_index(component);
136 if (index == PixelImap::Unused) {
137 continue;
138 }
139 if (result && result != index) {
140 LOG_CRITICAL(HW_GPU, "Generic attribute conflict in interpolation mode");
141 }
142 result = index;
143 }
144 return result.value_or(PixelImap::Unused);
145 }
146 } ps;
147
148 std::array<u32, 0xF> raw;
149 };
150
151 u64 GetLocalMemorySize() const {
152 return (common1.shader_local_memory_low_size |
153 (common2.shader_local_memory_high_size << 24));
154 }
155};
156static_assert(sizeof(Header) == 0x50, "Incorrect structure size");
157
158} // namespace Tegra::Shader
diff --git a/src/video_core/engines/shader_type.h b/src/video_core/engines/shader_type.h
deleted file mode 100644
index 49ce5cde5..000000000
--- a/src/video_core/engines/shader_type.h
+++ /dev/null
@@ -1,21 +0,0 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include "common/common_types.h"
8
9namespace Tegra::Engines {
10
11enum class ShaderType : u32 {
12 Vertex = 0,
13 TesselationControl = 1,
14 TesselationEval = 2,
15 Geometry = 3,
16 Fragment = 4,
17 Compute = 5,
18};
19static constexpr std::size_t MaxShaderTypes = 6;
20
21} // namespace Tegra::Engines
diff --git a/src/video_core/guest_driver.cpp b/src/video_core/guest_driver.cpp
deleted file mode 100644
index f058f2744..000000000
--- a/src/video_core/guest_driver.cpp
+++ /dev/null
@@ -1,37 +0,0 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <algorithm>
6#include <limits>
7#include <vector>
8
9#include "common/common_types.h"
10#include "video_core/guest_driver.h"
11
12namespace VideoCore {
13
14void GuestDriverProfile::DeduceTextureHandlerSize(std::vector<u32> bound_offsets) {
15 if (texture_handler_size) {
16 return;
17 }
18 const std::size_t size = bound_offsets.size();
19 if (size < 2) {
20 return;
21 }
22 std::sort(bound_offsets.begin(), bound_offsets.end(), std::less{});
23 u32 min_val = std::numeric_limits<u32>::max();
24 for (std::size_t i = 1; i < size; ++i) {
25 if (bound_offsets[i] == bound_offsets[i - 1]) {
26 continue;
27 }
28 const u32 new_min = bound_offsets[i] - bound_offsets[i - 1];
29 min_val = std::min(min_val, new_min);
30 }
31 if (min_val > 2) {
32 return;
33 }
34 texture_handler_size = min_texture_handler_size * min_val;
35}
36
37} // namespace VideoCore
diff --git a/src/video_core/guest_driver.h b/src/video_core/guest_driver.h
deleted file mode 100644
index 21e569ba1..000000000
--- a/src/video_core/guest_driver.h
+++ /dev/null
@@ -1,46 +0,0 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <optional>
8#include <vector>
9
10#include "common/common_types.h"
11
12namespace VideoCore {
13
14/**
15 * The GuestDriverProfile class is used to learn about the GPU drivers behavior and collect
16 * information necessary for impossible to avoid HLE methods like shader tracks as they are
17 * Entscheidungsproblems.
18 */
19class GuestDriverProfile {
20public:
21 explicit GuestDriverProfile() = default;
22 explicit GuestDriverProfile(std::optional<u32> texture_handler_size_)
23 : texture_handler_size{texture_handler_size_} {}
24
25 void DeduceTextureHandlerSize(std::vector<u32> bound_offsets);
26
27 u32 GetTextureHandlerSize() const {
28 return texture_handler_size.value_or(default_texture_handler_size);
29 }
30
31 bool IsTextureHandlerSizeKnown() const {
32 return texture_handler_size.has_value();
33 }
34
35private:
36 // Minimum size of texture handler any driver can use.
37 static constexpr u32 min_texture_handler_size = 4;
38
39 // This goes with Vulkan and OpenGL standards but Nvidia GPUs can easily use 4 bytes instead.
40 // Thus, certain drivers may squish the size.
41 static constexpr u32 default_texture_handler_size = 8;
42
43 std::optional<u32> texture_handler_size = default_texture_handler_size;
44};
45
46} // namespace VideoCore
diff --git a/src/video_core/memory_manager.cpp b/src/video_core/memory_manager.cpp
index d2b9d5f2b..882eff880 100644
--- a/src/video_core/memory_manager.cpp
+++ b/src/video_core/memory_manager.cpp
@@ -69,7 +69,6 @@ void MemoryManager::Unmap(GPUVAddr gpu_addr, std::size_t size) {
69 } else { 69 } else {
70 UNREACHABLE_MSG("Unmapping non-existent GPU address=0x{:x}", gpu_addr); 70 UNREACHABLE_MSG("Unmapping non-existent GPU address=0x{:x}", gpu_addr);
71 } 71 }
72
73 const auto submapped_ranges = GetSubmappedRange(gpu_addr, size); 72 const auto submapped_ranges = GetSubmappedRange(gpu_addr, size);
74 73
75 for (const auto& map : submapped_ranges) { 74 for (const auto& map : submapped_ranges) {
diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h
index 58014c1c3..b094fc064 100644
--- a/src/video_core/rasterizer_interface.h
+++ b/src/video_core/rasterizer_interface.h
@@ -11,7 +11,6 @@
11#include "common/common_types.h" 11#include "common/common_types.h"
12#include "video_core/engines/fermi_2d.h" 12#include "video_core/engines/fermi_2d.h"
13#include "video_core/gpu.h" 13#include "video_core/gpu.h"
14#include "video_core/guest_driver.h"
15 14
16namespace Tegra { 15namespace Tegra {
17class MemoryManager; 16class MemoryManager;
@@ -45,7 +44,7 @@ public:
45 virtual void Clear() = 0; 44 virtual void Clear() = 0;
46 45
47 /// Dispatches a compute shader invocation 46 /// Dispatches a compute shader invocation
48 virtual void DispatchCompute(GPUVAddr code_addr) = 0; 47 virtual void DispatchCompute() = 0;
49 48
50 /// Resets the counter of a query 49 /// Resets the counter of a query
51 virtual void ResetCounter(QueryType type) = 0; 50 virtual void ResetCounter(QueryType type) = 0;
@@ -136,18 +135,5 @@ public:
136 /// Initialize disk cached resources for the game being emulated 135 /// Initialize disk cached resources for the game being emulated
137 virtual void LoadDiskResources(u64 title_id, std::stop_token stop_loading, 136 virtual void LoadDiskResources(u64 title_id, std::stop_token stop_loading,
138 const DiskResourceLoadCallback& callback) {} 137 const DiskResourceLoadCallback& callback) {}
139
140 /// Grant access to the Guest Driver Profile for recording/obtaining info on the guest driver.
141 [[nodiscard]] GuestDriverProfile& AccessGuestDriverProfile() {
142 return guest_driver_profile;
143 }
144
145 /// Grant access to the Guest Driver Profile for recording/obtaining info on the guest driver.
146 [[nodiscard]] const GuestDriverProfile& AccessGuestDriverProfile() const {
147 return guest_driver_profile;
148 }
149
150private:
151 GuestDriverProfile guest_driver_profile{};
152}; 138};
153} // namespace VideoCore 139} // namespace VideoCore
diff --git a/src/video_core/renderer_opengl/gl_arb_decompiler.cpp b/src/video_core/renderer_opengl/gl_arb_decompiler.cpp
deleted file mode 100644
index e8d8d2aa5..000000000
--- a/src/video_core/renderer_opengl/gl_arb_decompiler.cpp
+++ /dev/null
@@ -1,2124 +0,0 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <algorithm>
6#include <array>
7#include <cstddef>
8#include <string>
9#include <string_view>
10#include <utility>
11#include <variant>
12
13#include <fmt/format.h>
14
15#include "common/alignment.h"
16#include "common/assert.h"
17#include "common/common_types.h"
18#include "video_core/renderer_opengl/gl_arb_decompiler.h"
19#include "video_core/renderer_opengl/gl_device.h"
20#include "video_core/shader/registry.h"
21#include "video_core/shader/shader_ir.h"
22
23// Predicates in the decompiled code follow the convention that -1 means true and 0 means false.
24// GLASM lacks booleans, so they have to be implemented as integers.
25// Using -1 for true is useful because both CMP.S and NOT.U can negate it, and CMP.S can be used to
26// select between two values, because -1 will be evaluated as true and 0 as false.
27
28namespace OpenGL {
29
30namespace {
31
32using Tegra::Engines::ShaderType;
33using Tegra::Shader::Attribute;
34using Tegra::Shader::PixelImap;
35using Tegra::Shader::Register;
36using namespace VideoCommon::Shader;
37using Operation = const OperationNode&;
38
39constexpr std::array INTERNAL_FLAG_NAMES = {"ZERO", "SIGN", "CARRY", "OVERFLOW"};
40
41char Swizzle(std::size_t component) {
42 static constexpr std::string_view SWIZZLE{"xyzw"};
43 return SWIZZLE.at(component);
44}
45
46constexpr bool IsGenericAttribute(Attribute::Index index) {
47 return index >= Attribute::Index::Attribute_0 && index <= Attribute::Index::Attribute_31;
48}
49
50u32 GetGenericAttributeIndex(Attribute::Index index) {
51 ASSERT(IsGenericAttribute(index));
52 return static_cast<u32>(index) - static_cast<u32>(Attribute::Index::Attribute_0);
53}
54
55std::string_view Modifiers(Operation operation) {
56 const auto meta = std::get_if<MetaArithmetic>(&operation.GetMeta());
57 if (meta && meta->precise) {
58 return ".PREC";
59 }
60 return "";
61}
62
63std::string_view GetInputFlags(PixelImap attribute) {
64 switch (attribute) {
65 case PixelImap::Perspective:
66 return "";
67 case PixelImap::Constant:
68 return "FLAT ";
69 case PixelImap::ScreenLinear:
70 return "NOPERSPECTIVE ";
71 case PixelImap::Unused:
72 break;
73 }
74 UNIMPLEMENTED_MSG("Unknown attribute usage index={}", attribute);
75 return {};
76}
77
78std::string_view ImageType(Tegra::Shader::ImageType image_type) {
79 switch (image_type) {
80 case Tegra::Shader::ImageType::Texture1D:
81 return "1D";
82 case Tegra::Shader::ImageType::TextureBuffer:
83 return "BUFFER";
84 case Tegra::Shader::ImageType::Texture1DArray:
85 return "ARRAY1D";
86 case Tegra::Shader::ImageType::Texture2D:
87 return "2D";
88 case Tegra::Shader::ImageType::Texture2DArray:
89 return "ARRAY2D";
90 case Tegra::Shader::ImageType::Texture3D:
91 return "3D";
92 }
93 UNREACHABLE();
94 return {};
95}
96
97std::string_view StackName(MetaStackClass stack) {
98 switch (stack) {
99 case MetaStackClass::Ssy:
100 return "SSY";
101 case MetaStackClass::Pbk:
102 return "PBK";
103 }
104 UNREACHABLE();
105 return "";
106};
107
108std::string_view PrimitiveDescription(Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology topology) {
109 switch (topology) {
110 case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::Points:
111 return "POINTS";
112 case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::Lines:
113 case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::LineStrip:
114 return "LINES";
115 case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::LinesAdjacency:
116 case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::LineStripAdjacency:
117 return "LINES_ADJACENCY";
118 case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::Triangles:
119 case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::TriangleStrip:
120 case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::TriangleFan:
121 return "TRIANGLES";
122 case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::TrianglesAdjacency:
123 case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::TriangleStripAdjacency:
124 return "TRIANGLES_ADJACENCY";
125 default:
126 UNIMPLEMENTED_MSG("topology={}", topology);
127 return "POINTS";
128 }
129}
130
131std::string_view TopologyName(Tegra::Shader::OutputTopology topology) {
132 switch (topology) {
133 case Tegra::Shader::OutputTopology::PointList:
134 return "POINTS";
135 case Tegra::Shader::OutputTopology::LineStrip:
136 return "LINE_STRIP";
137 case Tegra::Shader::OutputTopology::TriangleStrip:
138 return "TRIANGLE_STRIP";
139 default:
140 UNIMPLEMENTED_MSG("Unknown output topology: {}", topology);
141 return "points";
142 }
143}
144
145std::string_view StageInputName(ShaderType stage) {
146 switch (stage) {
147 case ShaderType::Vertex:
148 case ShaderType::Geometry:
149 return "vertex";
150 case ShaderType::Fragment:
151 return "fragment";
152 case ShaderType::Compute:
153 return "invocation";
154 default:
155 UNREACHABLE();
156 return "";
157 }
158}
159
160std::string TextureType(const MetaTexture& meta) {
161 if (meta.sampler.is_buffer) {
162 return "BUFFER";
163 }
164 std::string type;
165 if (meta.sampler.is_shadow) {
166 type += "SHADOW";
167 }
168 if (meta.sampler.is_array) {
169 type += "ARRAY";
170 }
171 type += [&meta] {
172 switch (meta.sampler.type) {
173 case Tegra::Shader::TextureType::Texture1D:
174 return "1D";
175 case Tegra::Shader::TextureType::Texture2D:
176 return "2D";
177 case Tegra::Shader::TextureType::Texture3D:
178 return "3D";
179 case Tegra::Shader::TextureType::TextureCube:
180 return "CUBE";
181 }
182 UNREACHABLE();
183 return "2D";
184 }();
185 return type;
186}
187
188class ARBDecompiler final {
189public:
190 explicit ARBDecompiler(const Device& device_, const ShaderIR& ir_, const Registry& registry_,
191 ShaderType stage_, std::string_view identifier);
192
193 std::string Code() const {
194 return shader_source;
195 }
196
197private:
198 void DefineGlobalMemory();
199
200 void DeclareHeader();
201 void DeclareVertex();
202 void DeclareGeometry();
203 void DeclareFragment();
204 void DeclareCompute();
205 void DeclareInputAttributes();
206 void DeclareOutputAttributes();
207 void DeclareLocalMemory();
208 void DeclareGlobalMemory();
209 void DeclareConstantBuffers();
210 void DeclareRegisters();
211 void DeclareTemporaries();
212 void DeclarePredicates();
213 void DeclareInternalFlags();
214
215 void InitializeVariables();
216
217 void DecompileAST();
218 void DecompileBranchMode();
219
220 void VisitAST(const ASTNode& node);
221 std::string VisitExpression(const Expr& node);
222
223 void VisitBlock(const NodeBlock& bb);
224
225 std::string Visit(const Node& node);
226
227 std::tuple<std::string, std::string, std::size_t> BuildCoords(Operation);
228 std::string BuildAoffi(Operation);
229 std::string GlobalMemoryPointer(const GmemNode& gmem);
230 void Exit();
231
232 std::string Assign(Operation);
233 std::string Select(Operation);
234 std::string FClamp(Operation);
235 std::string FCastHalf0(Operation);
236 std::string FCastHalf1(Operation);
237 std::string FSqrt(Operation);
238 std::string FSwizzleAdd(Operation);
239 std::string HAdd2(Operation);
240 std::string HMul2(Operation);
241 std::string HFma2(Operation);
242 std::string HAbsolute(Operation);
243 std::string HNegate(Operation);
244 std::string HClamp(Operation);
245 std::string HCastFloat(Operation);
246 std::string HUnpack(Operation);
247 std::string HMergeF32(Operation);
248 std::string HMergeH0(Operation);
249 std::string HMergeH1(Operation);
250 std::string HPack2(Operation);
251 std::string LogicalAssign(Operation);
252 std::string LogicalPick2(Operation);
253 std::string LogicalAnd2(Operation);
254 std::string FloatOrdered(Operation);
255 std::string FloatUnordered(Operation);
256 std::string LogicalAddCarry(Operation);
257 std::string Texture(Operation);
258 std::string TextureGather(Operation);
259 std::string TextureQueryDimensions(Operation);
260 std::string TextureQueryLod(Operation);
261 std::string TexelFetch(Operation);
262 std::string TextureGradient(Operation);
263 std::string ImageLoad(Operation);
264 std::string ImageStore(Operation);
265 std::string Branch(Operation);
266 std::string BranchIndirect(Operation);
267 std::string PushFlowStack(Operation);
268 std::string PopFlowStack(Operation);
269 std::string Exit(Operation);
270 std::string Discard(Operation);
271 std::string EmitVertex(Operation);
272 std::string EndPrimitive(Operation);
273 std::string InvocationId(Operation);
274 std::string YNegate(Operation);
275 std::string ThreadId(Operation);
276 std::string ShuffleIndexed(Operation);
277 std::string Barrier(Operation);
278 std::string MemoryBarrierGroup(Operation);
279 std::string MemoryBarrierGlobal(Operation);
280
281 template <const std::string_view& op>
282 std::string Unary(Operation operation) {
283 std::string temporary = AllocTemporary();
284 AddLine("{}{} {}, {};", op, Modifiers(operation), temporary, Visit(operation[0]));
285 return temporary;
286 }
287
288 template <const std::string_view& op>
289 std::string Binary(Operation operation) {
290 std::string temporary = AllocTemporary();
291 AddLine("{}{} {}, {}, {};", op, Modifiers(operation), temporary, Visit(operation[0]),
292 Visit(operation[1]));
293 return temporary;
294 }
295
296 template <const std::string_view& op>
297 std::string Trinary(Operation operation) {
298 std::string temporary = AllocTemporary();
299 AddLine("{}{} {}, {}, {}, {};", op, Modifiers(operation), temporary, Visit(operation[0]),
300 Visit(operation[1]), Visit(operation[2]));
301 return temporary;
302 }
303
304 template <const std::string_view& op, bool unordered>
305 std::string FloatComparison(Operation operation) {
306 std::string temporary = AllocTemporary();
307 AddLine("TRUNC.U.CC RC.x, {};", Binary<op>(operation));
308 AddLine("MOV.S {}, 0;", temporary);
309 AddLine("MOV.S {} (NE.x), -1;", temporary);
310
311 const std::string op_a = Visit(operation[0]);
312 const std::string op_b = Visit(operation[1]);
313 if constexpr (unordered) {
314 AddLine("SNE.F RC.x, {}, {};", op_a, op_a);
315 AddLine("TRUNC.U.CC RC.x, RC.x;");
316 AddLine("MOV.S {} (NE.x), -1;", temporary);
317 AddLine("SNE.F RC.x, {}, {};", op_b, op_b);
318 AddLine("TRUNC.U.CC RC.x, RC.x;");
319 AddLine("MOV.S {} (NE.x), -1;", temporary);
320 } else if (op == SNE_F) {
321 AddLine("SNE.F RC.x, {}, {};", op_a, op_a);
322 AddLine("TRUNC.U.CC RC.x, RC.x;");
323 AddLine("MOV.S {} (NE.x), 0;", temporary);
324 AddLine("SNE.F RC.x, {}, {};", op_b, op_b);
325 AddLine("TRUNC.U.CC RC.x, RC.x;");
326 AddLine("MOV.S {} (NE.x), 0;", temporary);
327 }
328 return temporary;
329 }
330
331 template <const std::string_view& op, bool is_nan>
332 std::string HalfComparison(Operation operation) {
333 std::string tmp1 = AllocVectorTemporary();
334 const std::string tmp2 = AllocVectorTemporary();
335 const std::string op_a = Visit(operation[0]);
336 const std::string op_b = Visit(operation[1]);
337 AddLine("UP2H.F {}, {};", tmp1, op_a);
338 AddLine("UP2H.F {}, {};", tmp2, op_b);
339 AddLine("{} {}, {}, {};", op, tmp1, tmp1, tmp2);
340 AddLine("TRUNC.U.CC RC.xy, {};", tmp1);
341 AddLine("MOV.S {}.xy, {{0, 0, 0, 0}};", tmp1);
342 AddLine("MOV.S {}.x (NE.x), -1;", tmp1);
343 AddLine("MOV.S {}.y (NE.y), -1;", tmp1);
344 if constexpr (is_nan) {
345 AddLine("MOVC.F RC.x, {};", op_a);
346 AddLine("MOV.S {}.x (NAN.x), -1;", tmp1);
347 AddLine("MOVC.F RC.x, {};", op_b);
348 AddLine("MOV.S {}.y (NAN.x), -1;", tmp1);
349 }
350 return tmp1;
351 }
352
353 template <const std::string_view& op, const std::string_view& type>
354 std::string AtomicImage(Operation operation) {
355 const auto& meta = std::get<MetaImage>(operation.GetMeta());
356 const u32 image_id = device.GetBaseBindings(stage).image + meta.image.index;
357 const std::size_t num_coords = operation.GetOperandsCount();
358 const std::size_t num_values = meta.values.size();
359
360 const std::string coord = AllocVectorTemporary();
361 const std::string value = AllocVectorTemporary();
362 for (std::size_t i = 0; i < num_coords; ++i) {
363 AddLine("MOV.S {}.{}, {};", coord, Swizzle(i), Visit(operation[i]));
364 }
365 for (std::size_t i = 0; i < num_values; ++i) {
366 AddLine("MOV.F {}.{}, {};", value, Swizzle(i), Visit(meta.values[i]));
367 }
368
369 AddLine("ATOMIM.{}.{} {}.x, {}, {}, image[{}], {};", op, type, coord, value, coord,
370 image_id, ImageType(meta.image.type));
371 return fmt::format("{}.x", coord);
372 }
373
374 template <const std::string_view& op, const std::string_view& type>
375 std::string Atomic(Operation operation) {
376 std::string temporary = AllocTemporary();
377 std::string address;
378 std::string_view opname;
379 bool robust = false;
380 if (const auto gmem = std::get_if<GmemNode>(&*operation[0])) {
381 address = GlobalMemoryPointer(*gmem);
382 opname = "ATOM";
383 robust = true;
384 } else if (const auto smem = std::get_if<SmemNode>(&*operation[0])) {
385 address = fmt::format("shared_mem[{}]", Visit(smem->GetAddress()));
386 opname = "ATOMS";
387 } else {
388 UNREACHABLE();
389 return "{0, 0, 0, 0}";
390 }
391 if (robust) {
392 AddLine("IF NE.x;");
393 }
394 AddLine("{}.{}.{} {}, {}, {};", opname, op, type, temporary, Visit(operation[1]), address);
395 if (robust) {
396 AddLine("ELSE;");
397 AddLine("MOV.S {}, 0;", temporary);
398 AddLine("ENDIF;");
399 }
400 return temporary;
401 }
402
403 template <char type>
404 std::string Negate(Operation operation) {
405 std::string temporary = AllocTemporary();
406 if constexpr (type == 'F') {
407 AddLine("MOV.F32 {}, -{};", temporary, Visit(operation[0]));
408 } else {
409 AddLine("MOV.{} {}, -{};", type, temporary, Visit(operation[0]));
410 }
411 return temporary;
412 }
413
414 template <char type>
415 std::string Absolute(Operation operation) {
416 std::string temporary = AllocTemporary();
417 AddLine("MOV.{} {}, |{}|;", type, temporary, Visit(operation[0]));
418 return temporary;
419 }
420
421 template <char type>
422 std::string BitfieldInsert(Operation operation) {
423 const std::string temporary = AllocVectorTemporary();
424 AddLine("MOV.{} {}.x, {};", type, temporary, Visit(operation[3]));
425 AddLine("MOV.{} {}.y, {};", type, temporary, Visit(operation[2]));
426 AddLine("BFI.{} {}.x, {}, {}, {};", type, temporary, temporary, Visit(operation[1]),
427 Visit(operation[0]));
428 return fmt::format("{}.x", temporary);
429 }
430
431 template <char type>
432 std::string BitfieldExtract(Operation operation) {
433 const std::string temporary = AllocVectorTemporary();
434 AddLine("MOV.{} {}.x, {};", type, temporary, Visit(operation[2]));
435 AddLine("MOV.{} {}.y, {};", type, temporary, Visit(operation[1]));
436 AddLine("BFE.{} {}.x, {}, {};", type, temporary, temporary, Visit(operation[0]));
437 return fmt::format("{}.x", temporary);
438 }
439
440 template <char swizzle>
441 std::string LocalInvocationId(Operation) {
442 return fmt::format("invocation.localid.{}", swizzle);
443 }
444
445 template <char swizzle>
446 std::string WorkGroupId(Operation) {
447 return fmt::format("invocation.groupid.{}", swizzle);
448 }
449
450 template <char c1, char c2>
451 std::string ThreadMask(Operation) {
452 return fmt::format("{}.thread{}{}mask", StageInputName(stage), c1, c2);
453 }
454
455 template <typename... Args>
456 void AddExpression(std::string_view text, Args&&... args) {
457 shader_source += fmt::format(fmt::runtime(text), std::forward<Args>(args)...);
458 }
459
460 template <typename... Args>
461 void AddLine(std::string_view text, Args&&... args) {
462 AddExpression(text, std::forward<Args>(args)...);
463 shader_source += '\n';
464 }
465
466 std::string AllocLongVectorTemporary() {
467 max_long_temporaries = std::max(max_long_temporaries, num_long_temporaries + 1);
468 return fmt::format("L{}", num_long_temporaries++);
469 }
470
471 std::string AllocLongTemporary() {
472 return fmt::format("{}.x", AllocLongVectorTemporary());
473 }
474
475 std::string AllocVectorTemporary() {
476 max_temporaries = std::max(max_temporaries, num_temporaries + 1);
477 return fmt::format("T{}", num_temporaries++);
478 }
479
480 std::string AllocTemporary() {
481 return fmt::format("{}.x", AllocVectorTemporary());
482 }
483
484 void ResetTemporaries() noexcept {
485 num_temporaries = 0;
486 num_long_temporaries = 0;
487 }
488
489 const Device& device;
490 const ShaderIR& ir;
491 const Registry& registry;
492 const ShaderType stage;
493
494 std::size_t num_temporaries = 0;
495 std::size_t max_temporaries = 0;
496
497 std::size_t num_long_temporaries = 0;
498 std::size_t max_long_temporaries = 0;
499
500 std::map<GlobalMemoryBase, u32> global_memory_names;
501
502 std::string shader_source;
503
504 static constexpr std::string_view ADD_F32 = "ADD.F32";
505 static constexpr std::string_view ADD_S = "ADD.S";
506 static constexpr std::string_view ADD_U = "ADD.U";
507 static constexpr std::string_view MUL_F32 = "MUL.F32";
508 static constexpr std::string_view MUL_S = "MUL.S";
509 static constexpr std::string_view MUL_U = "MUL.U";
510 static constexpr std::string_view DIV_F32 = "DIV.F32";
511 static constexpr std::string_view DIV_S = "DIV.S";
512 static constexpr std::string_view DIV_U = "DIV.U";
513 static constexpr std::string_view MAD_F32 = "MAD.F32";
514 static constexpr std::string_view RSQ_F32 = "RSQ.F32";
515 static constexpr std::string_view COS_F32 = "COS.F32";
516 static constexpr std::string_view SIN_F32 = "SIN.F32";
517 static constexpr std::string_view EX2_F32 = "EX2.F32";
518 static constexpr std::string_view LG2_F32 = "LG2.F32";
519 static constexpr std::string_view SLT_F = "SLT.F32";
520 static constexpr std::string_view SLT_S = "SLT.S";
521 static constexpr std::string_view SLT_U = "SLT.U";
522 static constexpr std::string_view SEQ_F = "SEQ.F32";
523 static constexpr std::string_view SEQ_S = "SEQ.S";
524 static constexpr std::string_view SEQ_U = "SEQ.U";
525 static constexpr std::string_view SLE_F = "SLE.F32";
526 static constexpr std::string_view SLE_S = "SLE.S";
527 static constexpr std::string_view SLE_U = "SLE.U";
528 static constexpr std::string_view SGT_F = "SGT.F32";
529 static constexpr std::string_view SGT_S = "SGT.S";
530 static constexpr std::string_view SGT_U = "SGT.U";
531 static constexpr std::string_view SNE_F = "SNE.F32";
532 static constexpr std::string_view SNE_S = "SNE.S";
533 static constexpr std::string_view SNE_U = "SNE.U";
534 static constexpr std::string_view SGE_F = "SGE.F32";
535 static constexpr std::string_view SGE_S = "SGE.S";
536 static constexpr std::string_view SGE_U = "SGE.U";
537 static constexpr std::string_view AND_S = "AND.S";
538 static constexpr std::string_view AND_U = "AND.U";
539 static constexpr std::string_view TRUNC_F = "TRUNC.F";
540 static constexpr std::string_view TRUNC_S = "TRUNC.S";
541 static constexpr std::string_view TRUNC_U = "TRUNC.U";
542 static constexpr std::string_view SHL_S = "SHL.S";
543 static constexpr std::string_view SHL_U = "SHL.U";
544 static constexpr std::string_view SHR_S = "SHR.S";
545 static constexpr std::string_view SHR_U = "SHR.U";
546 static constexpr std::string_view OR_S = "OR.S";
547 static constexpr std::string_view OR_U = "OR.U";
548 static constexpr std::string_view XOR_S = "XOR.S";
549 static constexpr std::string_view XOR_U = "XOR.U";
550 static constexpr std::string_view NOT_S = "NOT.S";
551 static constexpr std::string_view NOT_U = "NOT.U";
552 static constexpr std::string_view BTC_S = "BTC.S";
553 static constexpr std::string_view BTC_U = "BTC.U";
554 static constexpr std::string_view BTFM_S = "BTFM.S";
555 static constexpr std::string_view BTFM_U = "BTFM.U";
556 static constexpr std::string_view ROUND_F = "ROUND.F";
557 static constexpr std::string_view CEIL_F = "CEIL.F";
558 static constexpr std::string_view FLR_F = "FLR.F";
559 static constexpr std::string_view I2F_S = "I2F.S";
560 static constexpr std::string_view I2F_U = "I2F.U";
561 static constexpr std::string_view MIN_F = "MIN.F";
562 static constexpr std::string_view MIN_S = "MIN.S";
563 static constexpr std::string_view MIN_U = "MIN.U";
564 static constexpr std::string_view MAX_F = "MAX.F";
565 static constexpr std::string_view MAX_S = "MAX.S";
566 static constexpr std::string_view MAX_U = "MAX.U";
567 static constexpr std::string_view MOV_U = "MOV.U";
568 static constexpr std::string_view TGBALLOT_U = "TGBALLOT.U";
569 static constexpr std::string_view TGALL_U = "TGALL.U";
570 static constexpr std::string_view TGANY_U = "TGANY.U";
571 static constexpr std::string_view TGEQ_U = "TGEQ.U";
572 static constexpr std::string_view EXCH = "EXCH";
573 static constexpr std::string_view ADD = "ADD";
574 static constexpr std::string_view MIN = "MIN";
575 static constexpr std::string_view MAX = "MAX";
576 static constexpr std::string_view AND = "AND";
577 static constexpr std::string_view OR = "OR";
578 static constexpr std::string_view XOR = "XOR";
579 static constexpr std::string_view U32 = "U32";
580 static constexpr std::string_view S32 = "S32";
581
582 static constexpr std::size_t NUM_ENTRIES = static_cast<std::size_t>(OperationCode::Amount);
583 using DecompilerType = std::string (ARBDecompiler::*)(Operation);
584 static constexpr std::array<DecompilerType, NUM_ENTRIES> OPERATION_DECOMPILERS = {
585 &ARBDecompiler::Assign,
586
587 &ARBDecompiler::Select,
588
589 &ARBDecompiler::Binary<ADD_F32>,
590 &ARBDecompiler::Binary<MUL_F32>,
591 &ARBDecompiler::Binary<DIV_F32>,
592 &ARBDecompiler::Trinary<MAD_F32>,
593 &ARBDecompiler::Negate<'F'>,
594 &ARBDecompiler::Absolute<'F'>,
595 &ARBDecompiler::FClamp,
596 &ARBDecompiler::FCastHalf0,
597 &ARBDecompiler::FCastHalf1,
598 &ARBDecompiler::Binary<MIN_F>,
599 &ARBDecompiler::Binary<MAX_F>,
600 &ARBDecompiler::Unary<COS_F32>,
601 &ARBDecompiler::Unary<SIN_F32>,
602 &ARBDecompiler::Unary<EX2_F32>,
603 &ARBDecompiler::Unary<LG2_F32>,
604 &ARBDecompiler::Unary<RSQ_F32>,
605 &ARBDecompiler::FSqrt,
606 &ARBDecompiler::Unary<ROUND_F>,
607 &ARBDecompiler::Unary<FLR_F>,
608 &ARBDecompiler::Unary<CEIL_F>,
609 &ARBDecompiler::Unary<TRUNC_F>,
610 &ARBDecompiler::Unary<I2F_S>,
611 &ARBDecompiler::Unary<I2F_U>,
612 &ARBDecompiler::FSwizzleAdd,
613
614 &ARBDecompiler::Binary<ADD_S>,
615 &ARBDecompiler::Binary<MUL_S>,
616 &ARBDecompiler::Binary<DIV_S>,
617 &ARBDecompiler::Negate<'S'>,
618 &ARBDecompiler::Absolute<'S'>,
619 &ARBDecompiler::Binary<MIN_S>,
620 &ARBDecompiler::Binary<MAX_S>,
621
622 &ARBDecompiler::Unary<TRUNC_S>,
623 &ARBDecompiler::Unary<MOV_U>,
624 &ARBDecompiler::Binary<SHL_S>,
625 &ARBDecompiler::Binary<SHR_U>,
626 &ARBDecompiler::Binary<SHR_S>,
627 &ARBDecompiler::Binary<AND_S>,
628 &ARBDecompiler::Binary<OR_S>,
629 &ARBDecompiler::Binary<XOR_S>,
630 &ARBDecompiler::Unary<NOT_S>,
631 &ARBDecompiler::BitfieldInsert<'S'>,
632 &ARBDecompiler::BitfieldExtract<'S'>,
633 &ARBDecompiler::Unary<BTC_S>,
634 &ARBDecompiler::Unary<BTFM_S>,
635
636 &ARBDecompiler::Binary<ADD_U>,
637 &ARBDecompiler::Binary<MUL_U>,
638 &ARBDecompiler::Binary<DIV_U>,
639 &ARBDecompiler::Binary<MIN_U>,
640 &ARBDecompiler::Binary<MAX_U>,
641 &ARBDecompiler::Unary<TRUNC_U>,
642 &ARBDecompiler::Unary<MOV_U>,
643 &ARBDecompiler::Binary<SHL_U>,
644 &ARBDecompiler::Binary<SHR_U>,
645 &ARBDecompiler::Binary<SHR_U>,
646 &ARBDecompiler::Binary<AND_U>,
647 &ARBDecompiler::Binary<OR_U>,
648 &ARBDecompiler::Binary<XOR_U>,
649 &ARBDecompiler::Unary<NOT_U>,
650 &ARBDecompiler::BitfieldInsert<'U'>,
651 &ARBDecompiler::BitfieldExtract<'U'>,
652 &ARBDecompiler::Unary<BTC_U>,
653 &ARBDecompiler::Unary<BTFM_U>,
654
655 &ARBDecompiler::HAdd2,
656 &ARBDecompiler::HMul2,
657 &ARBDecompiler::HFma2,
658 &ARBDecompiler::HAbsolute,
659 &ARBDecompiler::HNegate,
660 &ARBDecompiler::HClamp,
661 &ARBDecompiler::HCastFloat,
662 &ARBDecompiler::HUnpack,
663 &ARBDecompiler::HMergeF32,
664 &ARBDecompiler::HMergeH0,
665 &ARBDecompiler::HMergeH1,
666 &ARBDecompiler::HPack2,
667
668 &ARBDecompiler::LogicalAssign,
669 &ARBDecompiler::Binary<AND_U>,
670 &ARBDecompiler::Binary<OR_U>,
671 &ARBDecompiler::Binary<XOR_U>,
672 &ARBDecompiler::Unary<NOT_U>,
673 &ARBDecompiler::LogicalPick2,
674 &ARBDecompiler::LogicalAnd2,
675
676 &ARBDecompiler::FloatComparison<SLT_F, false>,
677 &ARBDecompiler::FloatComparison<SEQ_F, false>,
678 &ARBDecompiler::FloatComparison<SLE_F, false>,
679 &ARBDecompiler::FloatComparison<SGT_F, false>,
680 &ARBDecompiler::FloatComparison<SNE_F, false>,
681 &ARBDecompiler::FloatComparison<SGE_F, false>,
682 &ARBDecompiler::FloatOrdered,
683 &ARBDecompiler::FloatUnordered,
684 &ARBDecompiler::FloatComparison<SLT_F, true>,
685 &ARBDecompiler::FloatComparison<SEQ_F, true>,
686 &ARBDecompiler::FloatComparison<SLE_F, true>,
687 &ARBDecompiler::FloatComparison<SGT_F, true>,
688 &ARBDecompiler::FloatComparison<SNE_F, true>,
689 &ARBDecompiler::FloatComparison<SGE_F, true>,
690
691 &ARBDecompiler::Binary<SLT_S>,
692 &ARBDecompiler::Binary<SEQ_S>,
693 &ARBDecompiler::Binary<SLE_S>,
694 &ARBDecompiler::Binary<SGT_S>,
695 &ARBDecompiler::Binary<SNE_S>,
696 &ARBDecompiler::Binary<SGE_S>,
697
698 &ARBDecompiler::Binary<SLT_U>,
699 &ARBDecompiler::Binary<SEQ_U>,
700 &ARBDecompiler::Binary<SLE_U>,
701 &ARBDecompiler::Binary<SGT_U>,
702 &ARBDecompiler::Binary<SNE_U>,
703 &ARBDecompiler::Binary<SGE_U>,
704
705 &ARBDecompiler::LogicalAddCarry,
706
707 &ARBDecompiler::HalfComparison<SLT_F, false>,
708 &ARBDecompiler::HalfComparison<SEQ_F, false>,
709 &ARBDecompiler::HalfComparison<SLE_F, false>,
710 &ARBDecompiler::HalfComparison<SGT_F, false>,
711 &ARBDecompiler::HalfComparison<SNE_F, false>,
712 &ARBDecompiler::HalfComparison<SGE_F, false>,
713 &ARBDecompiler::HalfComparison<SLT_F, true>,
714 &ARBDecompiler::HalfComparison<SEQ_F, true>,
715 &ARBDecompiler::HalfComparison<SLE_F, true>,
716 &ARBDecompiler::HalfComparison<SGT_F, true>,
717 &ARBDecompiler::HalfComparison<SNE_F, true>,
718 &ARBDecompiler::HalfComparison<SGE_F, true>,
719
720 &ARBDecompiler::Texture,
721 &ARBDecompiler::Texture,
722 &ARBDecompiler::TextureGather,
723 &ARBDecompiler::TextureQueryDimensions,
724 &ARBDecompiler::TextureQueryLod,
725 &ARBDecompiler::TexelFetch,
726 &ARBDecompiler::TextureGradient,
727
728 &ARBDecompiler::ImageLoad,
729 &ARBDecompiler::ImageStore,
730
731 &ARBDecompiler::AtomicImage<ADD, U32>,
732 &ARBDecompiler::AtomicImage<AND, U32>,
733 &ARBDecompiler::AtomicImage<OR, U32>,
734 &ARBDecompiler::AtomicImage<XOR, U32>,
735 &ARBDecompiler::AtomicImage<EXCH, U32>,
736
737 &ARBDecompiler::Atomic<EXCH, U32>,
738 &ARBDecompiler::Atomic<ADD, U32>,
739 &ARBDecompiler::Atomic<MIN, U32>,
740 &ARBDecompiler::Atomic<MAX, U32>,
741 &ARBDecompiler::Atomic<AND, U32>,
742 &ARBDecompiler::Atomic<OR, U32>,
743 &ARBDecompiler::Atomic<XOR, U32>,
744
745 &ARBDecompiler::Atomic<EXCH, S32>,
746 &ARBDecompiler::Atomic<ADD, S32>,
747 &ARBDecompiler::Atomic<MIN, S32>,
748 &ARBDecompiler::Atomic<MAX, S32>,
749 &ARBDecompiler::Atomic<AND, S32>,
750 &ARBDecompiler::Atomic<OR, S32>,
751 &ARBDecompiler::Atomic<XOR, S32>,
752
753 &ARBDecompiler::Atomic<ADD, U32>,
754 &ARBDecompiler::Atomic<MIN, U32>,
755 &ARBDecompiler::Atomic<MAX, U32>,
756 &ARBDecompiler::Atomic<AND, U32>,
757 &ARBDecompiler::Atomic<OR, U32>,
758 &ARBDecompiler::Atomic<XOR, U32>,
759
760 &ARBDecompiler::Atomic<ADD, S32>,
761 &ARBDecompiler::Atomic<MIN, S32>,
762 &ARBDecompiler::Atomic<MAX, S32>,
763 &ARBDecompiler::Atomic<AND, S32>,
764 &ARBDecompiler::Atomic<OR, S32>,
765 &ARBDecompiler::Atomic<XOR, S32>,
766
767 &ARBDecompiler::Branch,
768 &ARBDecompiler::BranchIndirect,
769 &ARBDecompiler::PushFlowStack,
770 &ARBDecompiler::PopFlowStack,
771 &ARBDecompiler::Exit,
772 &ARBDecompiler::Discard,
773
774 &ARBDecompiler::EmitVertex,
775 &ARBDecompiler::EndPrimitive,
776
777 &ARBDecompiler::InvocationId,
778 &ARBDecompiler::YNegate,
779 &ARBDecompiler::LocalInvocationId<'x'>,
780 &ARBDecompiler::LocalInvocationId<'y'>,
781 &ARBDecompiler::LocalInvocationId<'z'>,
782 &ARBDecompiler::WorkGroupId<'x'>,
783 &ARBDecompiler::WorkGroupId<'y'>,
784 &ARBDecompiler::WorkGroupId<'z'>,
785
786 &ARBDecompiler::Unary<TGBALLOT_U>,
787 &ARBDecompiler::Unary<TGALL_U>,
788 &ARBDecompiler::Unary<TGANY_U>,
789 &ARBDecompiler::Unary<TGEQ_U>,
790
791 &ARBDecompiler::ThreadId,
792 &ARBDecompiler::ThreadMask<'e', 'q'>,
793 &ARBDecompiler::ThreadMask<'g', 'e'>,
794 &ARBDecompiler::ThreadMask<'g', 't'>,
795 &ARBDecompiler::ThreadMask<'l', 'e'>,
796 &ARBDecompiler::ThreadMask<'l', 't'>,
797 &ARBDecompiler::ShuffleIndexed,
798
799 &ARBDecompiler::Barrier,
800 &ARBDecompiler::MemoryBarrierGroup,
801 &ARBDecompiler::MemoryBarrierGlobal,
802 };
803};
804
805ARBDecompiler::ARBDecompiler(const Device& device_, const ShaderIR& ir_, const Registry& registry_,
806 ShaderType stage_, std::string_view identifier)
807 : device{device_}, ir{ir_}, registry{registry_}, stage{stage_} {
808 DefineGlobalMemory();
809
810 AddLine("TEMP RC;");
811 AddLine("TEMP FSWZA[4];");
812 AddLine("TEMP FSWZB[4];");
813 if (ir.IsDecompiled()) {
814 DecompileAST();
815 } else {
816 DecompileBranchMode();
817 }
818 AddLine("END");
819
820 const std::string code = std::move(shader_source);
821 DeclareHeader();
822 DeclareVertex();
823 DeclareGeometry();
824 DeclareFragment();
825 DeclareCompute();
826 DeclareInputAttributes();
827 DeclareOutputAttributes();
828 DeclareLocalMemory();
829 DeclareGlobalMemory();
830 DeclareConstantBuffers();
831 DeclareRegisters();
832 DeclareTemporaries();
833 DeclarePredicates();
834 DeclareInternalFlags();
835
836 shader_source += code;
837}
838
839std::string_view HeaderStageName(ShaderType stage) {
840 switch (stage) {
841 case ShaderType::Vertex:
842 return "vp";
843 case ShaderType::Geometry:
844 return "gp";
845 case ShaderType::Fragment:
846 return "fp";
847 case ShaderType::Compute:
848 return "cp";
849 default:
850 UNREACHABLE();
851 return "";
852 }
853}
854
855void ARBDecompiler::DefineGlobalMemory() {
856 u32 binding = 0;
857 for (const auto& pair : ir.GetGlobalMemory()) {
858 const GlobalMemoryBase base = pair.first;
859 global_memory_names.emplace(base, binding);
860 ++binding;
861 }
862}
863
864void ARBDecompiler::DeclareHeader() {
865 AddLine("!!NV{}5.0", HeaderStageName(stage));
866 // Enabling this allows us to cheat on some instructions like TXL with SHADOWARRAY2D
867 AddLine("OPTION NV_internal;");
868 AddLine("OPTION NV_gpu_program_fp64;");
869 AddLine("OPTION NV_shader_thread_group;");
870 if (ir.UsesWarps() && device.HasWarpIntrinsics()) {
871 AddLine("OPTION NV_shader_thread_shuffle;");
872 }
873 if (stage == ShaderType::Vertex) {
874 if (device.HasNvViewportArray2()) {
875 AddLine("OPTION NV_viewport_array2;");
876 }
877 }
878 if (stage == ShaderType::Fragment) {
879 AddLine("OPTION ARB_draw_buffers;");
880 }
881 if (device.HasImageLoadFormatted()) {
882 AddLine("OPTION EXT_shader_image_load_formatted;");
883 }
884}
885
886void ARBDecompiler::DeclareVertex() {
887 if (stage != ShaderType::Vertex) {
888 return;
889 }
890 AddLine("OUTPUT result_clip[] = {{ result.clip[0..7] }};");
891}
892
893void ARBDecompiler::DeclareGeometry() {
894 if (stage != ShaderType::Geometry) {
895 return;
896 }
897 const auto& info = registry.GetGraphicsInfo();
898 const auto& header = ir.GetHeader();
899 AddLine("PRIMITIVE_IN {};", PrimitiveDescription(info.primitive_topology));
900 AddLine("PRIMITIVE_OUT {};", TopologyName(header.common3.output_topology));
901 AddLine("VERTICES_OUT {};", header.common4.max_output_vertices.Value());
902 AddLine("ATTRIB vertex_position = vertex.position;");
903}
904
905void ARBDecompiler::DeclareFragment() {
906 if (stage != ShaderType::Fragment) {
907 return;
908 }
909 AddLine("OUTPUT result_color7 = result.color[7];");
910 AddLine("OUTPUT result_color6 = result.color[6];");
911 AddLine("OUTPUT result_color5 = result.color[5];");
912 AddLine("OUTPUT result_color4 = result.color[4];");
913 AddLine("OUTPUT result_color3 = result.color[3];");
914 AddLine("OUTPUT result_color2 = result.color[2];");
915 AddLine("OUTPUT result_color1 = result.color[1];");
916 AddLine("OUTPUT result_color0 = result.color;");
917}
918
919void ARBDecompiler::DeclareCompute() {
920 if (stage != ShaderType::Compute) {
921 return;
922 }
923 const ComputeInfo& info = registry.GetComputeInfo();
924 AddLine("GROUP_SIZE {} {} {};", info.workgroup_size[0], info.workgroup_size[1],
925 info.workgroup_size[2]);
926 if (info.shared_memory_size_in_words == 0) {
927 return;
928 }
929 const u32 limit = device.GetMaxComputeSharedMemorySize();
930 u32 size_in_bytes = info.shared_memory_size_in_words * 4;
931 if (size_in_bytes > limit) {
932 LOG_ERROR(Render_OpenGL, "Shared memory size {} is clamped to host's limit {}",
933 size_in_bytes, limit);
934 size_in_bytes = limit;
935 }
936
937 AddLine("SHARED_MEMORY {};", size_in_bytes);
938 AddLine("SHARED shared_mem[] = {{program.sharedmem}};");
939}
940
941void ARBDecompiler::DeclareInputAttributes() {
942 if (stage == ShaderType::Compute) {
943 return;
944 }
945 const std::string_view stage_name = StageInputName(stage);
946 for (const auto attribute : ir.GetInputAttributes()) {
947 if (!IsGenericAttribute(attribute)) {
948 continue;
949 }
950 const u32 index = GetGenericAttributeIndex(attribute);
951
952 std::string_view suffix;
953 if (stage == ShaderType::Fragment) {
954 const auto input_mode{ir.GetHeader().ps.GetPixelImap(index)};
955 if (input_mode == PixelImap::Unused) {
956 return;
957 }
958 suffix = GetInputFlags(input_mode);
959 }
960 AddLine("{}ATTRIB in_attr{}[] = {{ {}.attrib[{}..{}] }};", suffix, index, stage_name, index,
961 index);
962 }
963}
964
965void ARBDecompiler::DeclareOutputAttributes() {
966 if (stage == ShaderType::Compute) {
967 return;
968 }
969 for (const auto attribute : ir.GetOutputAttributes()) {
970 if (!IsGenericAttribute(attribute)) {
971 continue;
972 }
973 const u32 index = GetGenericAttributeIndex(attribute);
974 AddLine("OUTPUT out_attr{}[] = {{ result.attrib[{}..{}] }};", index, index, index);
975 }
976}
977
978void ARBDecompiler::DeclareLocalMemory() {
979 u64 size = 0;
980 if (stage == ShaderType::Compute) {
981 size = registry.GetComputeInfo().local_memory_size_in_words * 4ULL;
982 } else {
983 size = ir.GetHeader().GetLocalMemorySize();
984 }
985 if (size == 0) {
986 return;
987 }
988 const u64 element_count = Common::AlignUp(size, 4) / 4;
989 AddLine("TEMP lmem[{}];", element_count);
990}
991
992void ARBDecompiler::DeclareGlobalMemory() {
993 const size_t num_entries = ir.GetGlobalMemory().size();
994 if (num_entries > 0) {
995 AddLine("PARAM c[{}] = {{ program.local[0..{}] }};", num_entries, num_entries - 1);
996 }
997}
998
999void ARBDecompiler::DeclareConstantBuffers() {
1000 u32 binding = 0;
1001 for (const auto& cbuf : ir.GetConstantBuffers()) {
1002 AddLine("CBUFFER cbuf{}[] = {{ program.buffer[{}] }};", cbuf.first, binding);
1003 ++binding;
1004 }
1005}
1006
1007void ARBDecompiler::DeclareRegisters() {
1008 for (const u32 gpr : ir.GetRegisters()) {
1009 AddLine("TEMP R{};", gpr);
1010 }
1011}
1012
1013void ARBDecompiler::DeclareTemporaries() {
1014 for (std::size_t i = 0; i < max_temporaries; ++i) {
1015 AddLine("TEMP T{};", i);
1016 }
1017 for (std::size_t i = 0; i < max_long_temporaries; ++i) {
1018 AddLine("LONG TEMP L{};", i);
1019 }
1020}
1021
1022void ARBDecompiler::DeclarePredicates() {
1023 for (const Tegra::Shader::Pred pred : ir.GetPredicates()) {
1024 AddLine("TEMP P{};", static_cast<u64>(pred));
1025 }
1026}
1027
1028void ARBDecompiler::DeclareInternalFlags() {
1029 for (const char* name : INTERNAL_FLAG_NAMES) {
1030 AddLine("TEMP {};", name);
1031 }
1032}
1033
1034void ARBDecompiler::InitializeVariables() {
1035 AddLine("MOV.F32 FSWZA[0], -1;");
1036 AddLine("MOV.F32 FSWZA[1], 1;");
1037 AddLine("MOV.F32 FSWZA[2], -1;");
1038 AddLine("MOV.F32 FSWZA[3], 0;");
1039 AddLine("MOV.F32 FSWZB[0], -1;");
1040 AddLine("MOV.F32 FSWZB[1], -1;");
1041 AddLine("MOV.F32 FSWZB[2], 1;");
1042 AddLine("MOV.F32 FSWZB[3], -1;");
1043
1044 if (stage == ShaderType::Vertex || stage == ShaderType::Geometry) {
1045 AddLine("MOV.F result.position, {{0, 0, 0, 1}};");
1046 }
1047 for (const auto attribute : ir.GetOutputAttributes()) {
1048 if (!IsGenericAttribute(attribute)) {
1049 continue;
1050 }
1051 const u32 index = GetGenericAttributeIndex(attribute);
1052 AddLine("MOV.F result.attrib[{}], {{0, 0, 0, 1}};", index);
1053 }
1054 for (const u32 gpr : ir.GetRegisters()) {
1055 AddLine("MOV.F R{}, {{0, 0, 0, 0}};", gpr);
1056 }
1057 for (const Tegra::Shader::Pred pred : ir.GetPredicates()) {
1058 AddLine("MOV.U P{}, {{0, 0, 0, 0}};", static_cast<u64>(pred));
1059 }
1060}
1061
1062void ARBDecompiler::DecompileAST() {
1063 const u32 num_flow_variables = ir.GetASTNumVariables();
1064 for (u32 i = 0; i < num_flow_variables; ++i) {
1065 AddLine("TEMP F{};", i);
1066 }
1067 for (u32 i = 0; i < num_flow_variables; ++i) {
1068 AddLine("MOV.U F{}, {{0, 0, 0, 0}};", i);
1069 }
1070
1071 InitializeVariables();
1072
1073 VisitAST(ir.GetASTProgram());
1074}
1075
1076void ARBDecompiler::DecompileBranchMode() {
1077 static constexpr u32 FLOW_STACK_SIZE = 20;
1078 if (!ir.IsFlowStackDisabled()) {
1079 AddLine("TEMP SSY[{}];", FLOW_STACK_SIZE);
1080 AddLine("TEMP PBK[{}];", FLOW_STACK_SIZE);
1081 AddLine("TEMP SSY_TOP;");
1082 AddLine("TEMP PBK_TOP;");
1083 }
1084
1085 AddLine("TEMP PC;");
1086
1087 if (!ir.IsFlowStackDisabled()) {
1088 AddLine("MOV.U SSY_TOP.x, 0;");
1089 AddLine("MOV.U PBK_TOP.x, 0;");
1090 }
1091
1092 InitializeVariables();
1093
1094 const auto basic_block_end = ir.GetBasicBlocks().end();
1095 auto basic_block_it = ir.GetBasicBlocks().begin();
1096 const u32 first_address = basic_block_it->first;
1097 AddLine("MOV.U PC.x, {};", first_address);
1098
1099 AddLine("REP;");
1100
1101 std::size_t num_blocks = 0;
1102 while (basic_block_it != basic_block_end) {
1103 const auto& [address, bb] = *basic_block_it;
1104 ++num_blocks;
1105
1106 AddLine("SEQ.S.CC RC.x, PC.x, {};", address);
1107 AddLine("IF NE.x;");
1108
1109 VisitBlock(bb);
1110
1111 ++basic_block_it;
1112
1113 if (basic_block_it != basic_block_end) {
1114 const auto op = std::get_if<OperationNode>(&*bb[bb.size() - 1]);
1115 if (!op || op->GetCode() != OperationCode::Branch) {
1116 const u32 next_address = basic_block_it->first;
1117 AddLine("MOV.U PC.x, {};", next_address);
1118 AddLine("CONT;");
1119 }
1120 }
1121
1122 AddLine("ELSE;");
1123 }
1124 AddLine("RET;");
1125 while (num_blocks--) {
1126 AddLine("ENDIF;");
1127 }
1128
1129 AddLine("ENDREP;");
1130}
1131
1132void ARBDecompiler::VisitAST(const ASTNode& node) {
1133 if (const auto ast = std::get_if<ASTProgram>(&*node->GetInnerData())) {
1134 for (ASTNode current = ast->nodes.GetFirst(); current; current = current->GetNext()) {
1135 VisitAST(current);
1136 }
1137 } else if (const auto if_then = std::get_if<ASTIfThen>(&*node->GetInnerData())) {
1138 const std::string condition = VisitExpression(if_then->condition);
1139 ResetTemporaries();
1140
1141 AddLine("MOVC.U RC.x, {};", condition);
1142 AddLine("IF NE.x;");
1143 for (ASTNode current = if_then->nodes.GetFirst(); current; current = current->GetNext()) {
1144 VisitAST(current);
1145 }
1146 AddLine("ENDIF;");
1147 } else if (const auto if_else = std::get_if<ASTIfElse>(&*node->GetInnerData())) {
1148 AddLine("ELSE;");
1149 for (ASTNode current = if_else->nodes.GetFirst(); current; current = current->GetNext()) {
1150 VisitAST(current);
1151 }
1152 } else if (const auto decoded = std::get_if<ASTBlockDecoded>(&*node->GetInnerData())) {
1153 VisitBlock(decoded->nodes);
1154 } else if (const auto var_set = std::get_if<ASTVarSet>(&*node->GetInnerData())) {
1155 AddLine("MOV.U F{}, {};", var_set->index, VisitExpression(var_set->condition));
1156 ResetTemporaries();
1157 } else if (const auto do_while = std::get_if<ASTDoWhile>(&*node->GetInnerData())) {
1158 const std::string condition = VisitExpression(do_while->condition);
1159 ResetTemporaries();
1160 AddLine("REP;");
1161 for (ASTNode current = do_while->nodes.GetFirst(); current; current = current->GetNext()) {
1162 VisitAST(current);
1163 }
1164 AddLine("MOVC.U RC.x, {};", condition);
1165 AddLine("BRK (NE.x);");
1166 AddLine("ENDREP;");
1167 } else if (const auto ast_return = std::get_if<ASTReturn>(&*node->GetInnerData())) {
1168 const bool is_true = ExprIsTrue(ast_return->condition);
1169 if (!is_true) {
1170 AddLine("MOVC.U RC.x, {};", VisitExpression(ast_return->condition));
1171 AddLine("IF NE.x;");
1172 ResetTemporaries();
1173 }
1174 if (ast_return->kills) {
1175 AddLine("KIL TR;");
1176 } else {
1177 Exit();
1178 }
1179 if (!is_true) {
1180 AddLine("ENDIF;");
1181 }
1182 } else if (const auto ast_break = std::get_if<ASTBreak>(&*node->GetInnerData())) {
1183 if (ExprIsTrue(ast_break->condition)) {
1184 AddLine("BRK;");
1185 } else {
1186 AddLine("MOVC.U RC.x, {};", VisitExpression(ast_break->condition));
1187 AddLine("BRK (NE.x);");
1188 ResetTemporaries();
1189 }
1190 } else if (std::holds_alternative<ASTLabel>(*node->GetInnerData())) {
1191 // Nothing to do
1192 } else {
1193 UNREACHABLE();
1194 }
1195}
1196
1197std::string ARBDecompiler::VisitExpression(const Expr& node) {
1198 if (const auto expr = std::get_if<ExprAnd>(&*node)) {
1199 std::string result = AllocTemporary();
1200 AddLine("AND.U {}, {}, {};", result, VisitExpression(expr->operand1),
1201 VisitExpression(expr->operand2));
1202 return result;
1203 }
1204 if (const auto expr = std::get_if<ExprOr>(&*node)) {
1205 std::string result = AllocTemporary();
1206 AddLine("OR.U {}, {}, {};", result, VisitExpression(expr->operand1),
1207 VisitExpression(expr->operand2));
1208 return result;
1209 }
1210 if (const auto expr = std::get_if<ExprNot>(&*node)) {
1211 std::string result = AllocTemporary();
1212 AddLine("CMP.S {}, {}, 0, -1;", result, VisitExpression(expr->operand1));
1213 return result;
1214 }
1215 if (const auto expr = std::get_if<ExprPredicate>(&*node)) {
1216 return fmt::format("P{}.x", static_cast<u64>(expr->predicate));
1217 }
1218 if (const auto expr = std::get_if<ExprCondCode>(&*node)) {
1219 return Visit(ir.GetConditionCode(expr->cc));
1220 }
1221 if (const auto expr = std::get_if<ExprVar>(&*node)) {
1222 return fmt::format("F{}.x", expr->var_index);
1223 }
1224 if (const auto expr = std::get_if<ExprBoolean>(&*node)) {
1225 return expr->value ? "0xffffffff" : "0";
1226 }
1227 if (const auto expr = std::get_if<ExprGprEqual>(&*node)) {
1228 std::string result = AllocTemporary();
1229 AddLine("SEQ.U {}, R{}.x, {};", result, expr->gpr, expr->value);
1230 return result;
1231 }
1232 UNREACHABLE();
1233 return "0";
1234}
1235
1236void ARBDecompiler::VisitBlock(const NodeBlock& bb) {
1237 for (const auto& node : bb) {
1238 Visit(node);
1239 }
1240}
1241
1242std::string ARBDecompiler::Visit(const Node& node) {
1243 if (const auto operation = std::get_if<OperationNode>(&*node)) {
1244 if (const auto amend_index = operation->GetAmendIndex()) {
1245 Visit(ir.GetAmendNode(*amend_index));
1246 }
1247 const std::size_t index = static_cast<std::size_t>(operation->GetCode());
1248 if (index >= OPERATION_DECOMPILERS.size()) {
1249 UNREACHABLE_MSG("Out of bounds operation: {}", index);
1250 return {};
1251 }
1252 const auto decompiler = OPERATION_DECOMPILERS[index];
1253 if (decompiler == nullptr) {
1254 UNREACHABLE_MSG("Undefined operation: {}", index);
1255 return {};
1256 }
1257 return (this->*decompiler)(*operation);
1258 }
1259
1260 if (const auto gpr = std::get_if<GprNode>(&*node)) {
1261 const u32 index = gpr->GetIndex();
1262 if (index == Register::ZeroIndex) {
1263 return "{0, 0, 0, 0}.x";
1264 }
1265 return fmt::format("R{}.x", index);
1266 }
1267
1268 if (const auto cv = std::get_if<CustomVarNode>(&*node)) {
1269 return fmt::format("CV{}.x", cv->GetIndex());
1270 }
1271
1272 if (const auto immediate = std::get_if<ImmediateNode>(&*node)) {
1273 std::string temporary = AllocTemporary();
1274 AddLine("MOV.U {}, {};", temporary, immediate->GetValue());
1275 return temporary;
1276 }
1277
1278 if (const auto predicate = std::get_if<PredicateNode>(&*node)) {
1279 std::string temporary = AllocTemporary();
1280 switch (const auto index = predicate->GetIndex(); index) {
1281 case Tegra::Shader::Pred::UnusedIndex:
1282 AddLine("MOV.S {}, -1;", temporary);
1283 break;
1284 case Tegra::Shader::Pred::NeverExecute:
1285 AddLine("MOV.S {}, 0;", temporary);
1286 break;
1287 default:
1288 AddLine("MOV.S {}, P{}.x;", temporary, static_cast<u64>(index));
1289 break;
1290 }
1291 if (predicate->IsNegated()) {
1292 AddLine("CMP.S {}, {}, 0, -1;", temporary, temporary);
1293 }
1294 return temporary;
1295 }
1296
1297 if (const auto abuf = std::get_if<AbufNode>(&*node)) {
1298 if (abuf->IsPhysicalBuffer()) {
1299 UNIMPLEMENTED_MSG("Physical buffers are not implemented");
1300 return "{0, 0, 0, 0}.x";
1301 }
1302
1303 const Attribute::Index index = abuf->GetIndex();
1304 const u32 element = abuf->GetElement();
1305 const char swizzle = Swizzle(element);
1306 switch (index) {
1307 case Attribute::Index::Position: {
1308 if (stage == ShaderType::Geometry) {
1309 return fmt::format("{}_position[{}].{}", StageInputName(stage),
1310 Visit(abuf->GetBuffer()), swizzle);
1311 } else {
1312 return fmt::format("{}.position.{}", StageInputName(stage), swizzle);
1313 }
1314 }
1315 case Attribute::Index::TessCoordInstanceIDVertexID:
1316 ASSERT(stage == ShaderType::Vertex);
1317 switch (element) {
1318 case 2:
1319 return "vertex.instance";
1320 case 3:
1321 return "vertex.id";
1322 }
1323 UNIMPLEMENTED_MSG("Unmanaged TessCoordInstanceIDVertexID element={}", element);
1324 break;
1325 case Attribute::Index::PointCoord:
1326 switch (element) {
1327 case 0:
1328 return "fragment.pointcoord.x";
1329 case 1:
1330 return "fragment.pointcoord.y";
1331 }
1332 UNIMPLEMENTED();
1333 break;
1334 case Attribute::Index::FrontFacing: {
1335 ASSERT(stage == ShaderType::Fragment);
1336 ASSERT(element == 3);
1337 const std::string temporary = AllocVectorTemporary();
1338 AddLine("SGT.S RC.x, fragment.facing, {{0, 0, 0, 0}};");
1339 AddLine("MOV.U.CC RC.x, -RC;");
1340 AddLine("MOV.S {}.x, 0;", temporary);
1341 AddLine("MOV.S {}.x (NE.x), -1;", temporary);
1342 return fmt::format("{}.x", temporary);
1343 }
1344 default:
1345 if (IsGenericAttribute(index)) {
1346 if (stage == ShaderType::Geometry) {
1347 return fmt::format("in_attr{}[{}][0].{}", GetGenericAttributeIndex(index),
1348 Visit(abuf->GetBuffer()), swizzle);
1349 } else {
1350 return fmt::format("{}.attrib[{}].{}", StageInputName(stage),
1351 GetGenericAttributeIndex(index), swizzle);
1352 }
1353 }
1354 UNIMPLEMENTED_MSG("Unimplemented input attribute={}", index);
1355 break;
1356 }
1357 return "{0, 0, 0, 0}.x";
1358 }
1359
1360 if (const auto cbuf = std::get_if<CbufNode>(&*node)) {
1361 std::string offset_string;
1362 const auto& offset = cbuf->GetOffset();
1363 if (const auto imm = std::get_if<ImmediateNode>(&*offset)) {
1364 offset_string = std::to_string(imm->GetValue());
1365 } else {
1366 offset_string = Visit(offset);
1367 }
1368 std::string temporary = AllocTemporary();
1369 AddLine("LDC.F32 {}, cbuf{}[{}];", temporary, cbuf->GetIndex(), offset_string);
1370 return temporary;
1371 }
1372
1373 if (const auto gmem = std::get_if<GmemNode>(&*node)) {
1374 std::string temporary = AllocTemporary();
1375 AddLine("MOV {}, 0;", temporary);
1376 AddLine("LOAD.U32 {} (NE.x), {};", temporary, GlobalMemoryPointer(*gmem));
1377 return temporary;
1378 }
1379
1380 if (const auto lmem = std::get_if<LmemNode>(&*node)) {
1381 std::string temporary = Visit(lmem->GetAddress());
1382 AddLine("SHR.U {}, {}, 2;", temporary, temporary);
1383 AddLine("MOV.U {}, lmem[{}].x;", temporary, temporary);
1384 return temporary;
1385 }
1386
1387 if (const auto smem = std::get_if<SmemNode>(&*node)) {
1388 std::string temporary = Visit(smem->GetAddress());
1389 AddLine("LDS.U32 {}, shared_mem[{}];", temporary, temporary);
1390 return temporary;
1391 }
1392
1393 if (const auto internal_flag = std::get_if<InternalFlagNode>(&*node)) {
1394 const std::size_t index = static_cast<std::size_t>(internal_flag->GetFlag());
1395 return fmt::format("{}.x", INTERNAL_FLAG_NAMES[index]);
1396 }
1397
1398 if (const auto conditional = std::get_if<ConditionalNode>(&*node)) {
1399 if (const auto amend_index = conditional->GetAmendIndex()) {
1400 Visit(ir.GetAmendNode(*amend_index));
1401 }
1402 AddLine("MOVC.U RC.x, {};", Visit(conditional->GetCondition()));
1403 AddLine("IF NE.x;");
1404 VisitBlock(conditional->GetCode());
1405 AddLine("ENDIF;");
1406 return {};
1407 }
1408
1409 if ([[maybe_unused]] const auto cmt = std::get_if<CommentNode>(&*node)) {
1410 // Uncommenting this will generate invalid code. GLASM lacks comments.
1411 // AddLine("// {}", cmt->GetText());
1412 return {};
1413 }
1414
1415 UNIMPLEMENTED();
1416 return {};
1417}
1418
1419std::tuple<std::string, std::string, std::size_t> ARBDecompiler::BuildCoords(Operation operation) {
1420 const auto& meta = std::get<MetaTexture>(operation.GetMeta());
1421 UNIMPLEMENTED_IF(meta.sampler.is_indexed);
1422
1423 const bool is_extended = meta.sampler.is_shadow && meta.sampler.is_array &&
1424 meta.sampler.type == Tegra::Shader::TextureType::TextureCube;
1425 const std::size_t count = operation.GetOperandsCount();
1426 std::string temporary = AllocVectorTemporary();
1427 std::size_t i = 0;
1428 for (; i < count; ++i) {
1429 AddLine("MOV.F {}.{}, {};", temporary, Swizzle(i), Visit(operation[i]));
1430 }
1431 if (meta.sampler.is_array) {
1432 AddLine("I2F.S {}.{}, {};", temporary, Swizzle(i), Visit(meta.array));
1433 ++i;
1434 }
1435 if (meta.sampler.is_shadow) {
1436 std::string compare = Visit(meta.depth_compare);
1437 if (is_extended) {
1438 ASSERT(i == 4);
1439 std::string extra_coord = AllocVectorTemporary();
1440 AddLine("MOV.F {}.x, {};", extra_coord, compare);
1441 return {fmt::format("{}, {}", temporary, extra_coord), extra_coord, 0};
1442 }
1443 AddLine("MOV.F {}.{}, {};", temporary, Swizzle(i), compare);
1444 ++i;
1445 }
1446 return {temporary, temporary, i};
1447}
1448
1449std::string ARBDecompiler::BuildAoffi(Operation operation) {
1450 const auto& meta = std::get<MetaTexture>(operation.GetMeta());
1451 if (meta.aoffi.empty()) {
1452 return {};
1453 }
1454 const std::string temporary = AllocVectorTemporary();
1455 std::size_t i = 0;
1456 for (auto& node : meta.aoffi) {
1457 AddLine("MOV.S {}.{}, {};", temporary, Swizzle(i++), Visit(node));
1458 }
1459 return fmt::format(", offset({})", temporary);
1460}
1461
1462std::string ARBDecompiler::GlobalMemoryPointer(const GmemNode& gmem) {
1463 // Read a bindless SSBO, return its address and set CC accordingly
1464 // address = c[binding].xy
1465 // length = c[binding].z
1466 const u32 binding = global_memory_names.at(gmem.GetDescriptor());
1467
1468 const std::string pointer = AllocLongVectorTemporary();
1469 std::string temporary = AllocTemporary();
1470
1471 AddLine("PK64.U {}, c[{}];", pointer, binding);
1472 AddLine("SUB.U {}, {}, {};", temporary, Visit(gmem.GetRealAddress()),
1473 Visit(gmem.GetBaseAddress()));
1474 AddLine("CVT.U64.U32 {}.z, {};", pointer, temporary);
1475 AddLine("ADD.U64 {}.x, {}.x, {}.z;", pointer, pointer, pointer);
1476 // Compare offset to length and set CC
1477 AddLine("SLT.U.CC RC.x, {}, c[{}].z;", temporary, binding);
1478 return fmt::format("{}.x", pointer);
1479}
1480
1481void ARBDecompiler::Exit() {
1482 if (stage != ShaderType::Fragment) {
1483 AddLine("RET;");
1484 return;
1485 }
1486
1487 const auto safe_get_register = [this](u32 reg) -> std::string {
1488 if (ir.GetRegisters().contains(reg)) {
1489 return fmt::format("R{}.x", reg);
1490 }
1491 return "{0, 0, 0, 0}.x";
1492 };
1493
1494 const auto& header = ir.GetHeader();
1495 u32 current_reg = 0;
1496 for (u32 rt = 0; rt < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets; ++rt) {
1497 for (u32 component = 0; component < 4; ++component) {
1498 if (!header.ps.IsColorComponentOutputEnabled(rt, component)) {
1499 continue;
1500 }
1501 AddLine("MOV.F result_color{}.{}, {};", rt, Swizzle(component),
1502 safe_get_register(current_reg));
1503 ++current_reg;
1504 }
1505 }
1506 if (header.ps.omap.depth) {
1507 AddLine("MOV.F result.depth.z, {};", safe_get_register(current_reg + 1));
1508 }
1509
1510 AddLine("RET;");
1511}
1512
1513std::string ARBDecompiler::Assign(Operation operation) {
1514 const Node& dest = operation[0];
1515 const Node& src = operation[1];
1516
1517 std::string dest_name;
1518 if (const auto gpr = std::get_if<GprNode>(&*dest)) {
1519 if (gpr->GetIndex() == Register::ZeroIndex) {
1520 // Writing to Register::ZeroIndex is a no op
1521 return {};
1522 }
1523 dest_name = fmt::format("R{}.x", gpr->GetIndex());
1524 } else if (const auto abuf = std::get_if<AbufNode>(&*dest)) {
1525 const u32 element = abuf->GetElement();
1526 const char swizzle = Swizzle(element);
1527 switch (const Attribute::Index index = abuf->GetIndex()) {
1528 case Attribute::Index::Position:
1529 dest_name = fmt::format("result.position.{}", swizzle);
1530 break;
1531 case Attribute::Index::LayerViewportPointSize:
1532 switch (element) {
1533 case 0:
1534 UNIMPLEMENTED();
1535 return {};
1536 case 1:
1537 case 2:
1538 if (!device.HasNvViewportArray2()) {
1539 LOG_ERROR(
1540 Render_OpenGL,
1541 "NV_viewport_array2 is missing. Maxwell gen 2 or better is required.");
1542 return {};
1543 }
1544 dest_name = element == 1 ? "result.layer.x" : "result.viewport.x";
1545 break;
1546 case 3:
1547 dest_name = "result.pointsize.x";
1548 break;
1549 }
1550 break;
1551 case Attribute::Index::ClipDistances0123:
1552 dest_name = fmt::format("result.clip[{}].x", element);
1553 break;
1554 case Attribute::Index::ClipDistances4567:
1555 dest_name = fmt::format("result.clip[{}].x", element + 4);
1556 break;
1557 default:
1558 if (!IsGenericAttribute(index)) {
1559 UNREACHABLE();
1560 return {};
1561 }
1562 dest_name =
1563 fmt::format("result.attrib[{}].{}", GetGenericAttributeIndex(index), swizzle);
1564 break;
1565 }
1566 } else if (const auto lmem = std::get_if<LmemNode>(&*dest)) {
1567 const std::string address = Visit(lmem->GetAddress());
1568 AddLine("SHR.U {}, {}, 2;", address, address);
1569 dest_name = fmt::format("lmem[{}].x", address);
1570 } else if (const auto smem = std::get_if<SmemNode>(&*dest)) {
1571 AddLine("STS.U32 {}, shared_mem[{}];", Visit(src), Visit(smem->GetAddress()));
1572 ResetTemporaries();
1573 return {};
1574 } else if (const auto gmem = std::get_if<GmemNode>(&*dest)) {
1575 AddLine("IF NE.x;");
1576 AddLine("STORE.U32 {}, {};", Visit(src), GlobalMemoryPointer(*gmem));
1577 AddLine("ENDIF;");
1578 ResetTemporaries();
1579 return {};
1580 } else {
1581 UNREACHABLE();
1582 ResetTemporaries();
1583 return {};
1584 }
1585
1586 AddLine("MOV.U {}, {};", dest_name, Visit(src));
1587 ResetTemporaries();
1588 return {};
1589}
1590
1591std::string ARBDecompiler::Select(Operation operation) {
1592 std::string temporary = AllocTemporary();
1593 AddLine("CMP.S {}, {}, {}, {};", temporary, Visit(operation[0]), Visit(operation[1]),
1594 Visit(operation[2]));
1595 return temporary;
1596}
1597
1598std::string ARBDecompiler::FClamp(Operation operation) {
1599 // 1.0f in hex, replace with std::bit_cast on C++20
1600 static constexpr u32 POSITIVE_ONE = 0x3f800000;
1601
1602 std::string temporary = AllocTemporary();
1603 const Node& value = operation[0];
1604 const Node& low = operation[1];
1605 const Node& high = operation[2];
1606 const auto* const imm_low = std::get_if<ImmediateNode>(&*low);
1607 const auto* const imm_high = std::get_if<ImmediateNode>(&*high);
1608 if (imm_low && imm_high && imm_low->GetValue() == 0 && imm_high->GetValue() == POSITIVE_ONE) {
1609 AddLine("MOV.F32.SAT {}, {};", temporary, Visit(value));
1610 } else {
1611 AddLine("MIN.F {}, {}, {};", temporary, Visit(value), Visit(high));
1612 AddLine("MAX.F {}, {}, {};", temporary, temporary, Visit(low));
1613 }
1614 return temporary;
1615}
1616
1617std::string ARBDecompiler::FCastHalf0(Operation operation) {
1618 const std::string temporary = AllocVectorTemporary();
1619 AddLine("UP2H.F {}.x, {};", temporary, Visit(operation[0]));
1620 return fmt::format("{}.x", temporary);
1621}
1622
1623std::string ARBDecompiler::FCastHalf1(Operation operation) {
1624 const std::string temporary = AllocVectorTemporary();
1625 AddLine("UP2H.F {}.y, {};", temporary, Visit(operation[0]));
1626 AddLine("MOV {}.x, {}.y;", temporary, temporary);
1627 return fmt::format("{}.x", temporary);
1628}
1629
1630std::string ARBDecompiler::FSqrt(Operation operation) {
1631 std::string temporary = AllocTemporary();
1632 AddLine("RSQ.F32 {}, {};", temporary, Visit(operation[0]));
1633 AddLine("RCP.F32 {}, {};", temporary, temporary);
1634 return temporary;
1635}
1636
1637std::string ARBDecompiler::FSwizzleAdd(Operation operation) {
1638 const std::string temporary = AllocVectorTemporary();
1639 if (!device.HasWarpIntrinsics()) {
1640 LOG_ERROR(Render_OpenGL,
1641 "NV_shader_thread_shuffle is missing. Kepler or better is required.");
1642 AddLine("ADD.F {}.x, {}, {};", temporary, Visit(operation[0]), Visit(operation[1]));
1643 return fmt::format("{}.x", temporary);
1644 }
1645
1646 AddLine("AND.U {}.z, {}.threadid, 3;", temporary, StageInputName(stage));
1647 AddLine("SHL.U {}.z, {}.z, 1;", temporary, temporary);
1648 AddLine("SHR.U {}.z, {}, {}.z;", temporary, Visit(operation[2]), temporary);
1649 AddLine("AND.U {}.z, {}.z, 3;", temporary, temporary);
1650 AddLine("MUL.F32 {}.x, {}, FSWZA[{}.z];", temporary, Visit(operation[0]), temporary);
1651 AddLine("MUL.F32 {}.y, {}, FSWZB[{}.z];", temporary, Visit(operation[1]), temporary);
1652 AddLine("ADD.F32 {}.x, {}.x, {}.y;", temporary, temporary, temporary);
1653 return fmt::format("{}.x", temporary);
1654}
1655
1656std::string ARBDecompiler::HAdd2(Operation operation) {
1657 const std::string tmp1 = AllocVectorTemporary();
1658 const std::string tmp2 = AllocVectorTemporary();
1659 AddLine("UP2H.F {}.xy, {};", tmp1, Visit(operation[0]));
1660 AddLine("UP2H.F {}.xy, {};", tmp2, Visit(operation[1]));
1661 AddLine("ADD.F16 {}, {}, {};", tmp1, tmp1, tmp2);
1662 AddLine("PK2H.F {}.x, {};", tmp1, tmp1);
1663 return fmt::format("{}.x", tmp1);
1664}
1665
1666std::string ARBDecompiler::HMul2(Operation operation) {
1667 const std::string tmp1 = AllocVectorTemporary();
1668 const std::string tmp2 = AllocVectorTemporary();
1669 AddLine("UP2H.F {}.xy, {};", tmp1, Visit(operation[0]));
1670 AddLine("UP2H.F {}.xy, {};", tmp2, Visit(operation[1]));
1671 AddLine("MUL.F16 {}, {}, {};", tmp1, tmp1, tmp2);
1672 AddLine("PK2H.F {}.x, {};", tmp1, tmp1);
1673 return fmt::format("{}.x", tmp1);
1674}
1675
1676std::string ARBDecompiler::HFma2(Operation operation) {
1677 const std::string tmp1 = AllocVectorTemporary();
1678 const std::string tmp2 = AllocVectorTemporary();
1679 const std::string tmp3 = AllocVectorTemporary();
1680 AddLine("UP2H.F {}.xy, {};", tmp1, Visit(operation[0]));
1681 AddLine("UP2H.F {}.xy, {};", tmp2, Visit(operation[1]));
1682 AddLine("UP2H.F {}.xy, {};", tmp3, Visit(operation[2]));
1683 AddLine("MAD.F16 {}, {}, {}, {};", tmp1, tmp1, tmp2, tmp3);
1684 AddLine("PK2H.F {}.x, {};", tmp1, tmp1);
1685 return fmt::format("{}.x", tmp1);
1686}
1687
1688std::string ARBDecompiler::HAbsolute(Operation operation) {
1689 const std::string temporary = AllocVectorTemporary();
1690 AddLine("UP2H.F {}.xy, {};", temporary, Visit(operation[0]));
1691 AddLine("PK2H.F {}.x, |{}|;", temporary, temporary);
1692 return fmt::format("{}.x", temporary);
1693}
1694
1695std::string ARBDecompiler::HNegate(Operation operation) {
1696 const std::string temporary = AllocVectorTemporary();
1697 AddLine("UP2H.F {}.xy, {};", temporary, Visit(operation[0]));
1698 AddLine("MOVC.S RC.x, {};", Visit(operation[1]));
1699 AddLine("MOV.F {}.x (NE.x), -{}.x;", temporary, temporary);
1700 AddLine("MOVC.S RC.x, {};", Visit(operation[2]));
1701 AddLine("MOV.F {}.y (NE.x), -{}.y;", temporary, temporary);
1702 AddLine("PK2H.F {}.x, {};", temporary, temporary);
1703 return fmt::format("{}.x", temporary);
1704}
1705
1706std::string ARBDecompiler::HClamp(Operation operation) {
1707 const std::string tmp1 = AllocVectorTemporary();
1708 const std::string tmp2 = AllocVectorTemporary();
1709 AddLine("UP2H.F {}.xy, {};", tmp1, Visit(operation[0]));
1710 AddLine("MOV.U {}.x, {};", tmp2, Visit(operation[1]));
1711 AddLine("MOV.U {}.y, {}.x;", tmp2, tmp2);
1712 AddLine("MAX.F {}, {}, {};", tmp1, tmp1, tmp2);
1713 AddLine("MOV.U {}.x, {};", tmp2, Visit(operation[2]));
1714 AddLine("MOV.U {}.y, {}.x;", tmp2, tmp2);
1715 AddLine("MIN.F {}, {}, {};", tmp1, tmp1, tmp2);
1716 AddLine("PK2H.F {}.x, {};", tmp1, tmp1);
1717 return fmt::format("{}.x", tmp1);
1718}
1719
1720std::string ARBDecompiler::HCastFloat(Operation operation) {
1721 const std::string temporary = AllocVectorTemporary();
1722 AddLine("MOV.F {}.y, {{0, 0, 0, 0}};", temporary);
1723 AddLine("MOV.F {}.x, {};", temporary, Visit(operation[0]));
1724 AddLine("PK2H.F {}.x, {};", temporary, temporary);
1725 return fmt::format("{}.x", temporary);
1726}
1727
1728std::string ARBDecompiler::HUnpack(Operation operation) {
1729 std::string operand = Visit(operation[0]);
1730 switch (std::get<Tegra::Shader::HalfType>(operation.GetMeta())) {
1731 case Tegra::Shader::HalfType::H0_H1:
1732 return operand;
1733 case Tegra::Shader::HalfType::F32: {
1734 const std::string temporary = AllocVectorTemporary();
1735 AddLine("MOV.U {}.x, {};", temporary, operand);
1736 AddLine("MOV.U {}.y, {}.x;", temporary, temporary);
1737 AddLine("PK2H.F {}.x, {};", temporary, temporary);
1738 return fmt::format("{}.x", temporary);
1739 }
1740 case Tegra::Shader::HalfType::H0_H0: {
1741 const std::string temporary = AllocVectorTemporary();
1742 AddLine("UP2H.F {}.xy, {};", temporary, operand);
1743 AddLine("MOV.U {}.y, {}.x;", temporary, temporary);
1744 AddLine("PK2H.F {}.x, {};", temporary, temporary);
1745 return fmt::format("{}.x", temporary);
1746 }
1747 case Tegra::Shader::HalfType::H1_H1: {
1748 const std::string temporary = AllocVectorTemporary();
1749 AddLine("UP2H.F {}.xy, {};", temporary, operand);
1750 AddLine("MOV.U {}.x, {}.y;", temporary, temporary);
1751 AddLine("PK2H.F {}.x, {};", temporary, temporary);
1752 return fmt::format("{}.x", temporary);
1753 }
1754 }
1755 UNREACHABLE();
1756 return "{0, 0, 0, 0}.x";
1757}
1758
1759std::string ARBDecompiler::HMergeF32(Operation operation) {
1760 const std::string temporary = AllocVectorTemporary();
1761 AddLine("UP2H.F {}.xy, {};", temporary, Visit(operation[0]));
1762 return fmt::format("{}.x", temporary);
1763}
1764
1765std::string ARBDecompiler::HMergeH0(Operation operation) {
1766 const std::string temporary = AllocVectorTemporary();
1767 AddLine("UP2H.F {}.xy, {};", temporary, Visit(operation[0]));
1768 AddLine("UP2H.F {}.zw, {};", temporary, Visit(operation[1]));
1769 AddLine("MOV.U {}.x, {}.z;", temporary, temporary);
1770 AddLine("PK2H.F {}.x, {};", temporary, temporary);
1771 return fmt::format("{}.x", temporary);
1772}
1773
1774std::string ARBDecompiler::HMergeH1(Operation operation) {
1775 const std::string temporary = AllocVectorTemporary();
1776 AddLine("UP2H.F {}.xy, {};", temporary, Visit(operation[0]));
1777 AddLine("UP2H.F {}.zw, {};", temporary, Visit(operation[1]));
1778 AddLine("MOV.U {}.y, {}.w;", temporary, temporary);
1779 AddLine("PK2H.F {}.x, {};", temporary, temporary);
1780 return fmt::format("{}.x", temporary);
1781}
1782
1783std::string ARBDecompiler::HPack2(Operation operation) {
1784 const std::string temporary = AllocVectorTemporary();
1785 AddLine("MOV.U {}.x, {};", temporary, Visit(operation[0]));
1786 AddLine("MOV.U {}.y, {};", temporary, Visit(operation[1]));
1787 AddLine("PK2H.F {}.x, {};", temporary, temporary);
1788 return fmt::format("{}.x", temporary);
1789}
1790
1791std::string ARBDecompiler::LogicalAssign(Operation operation) {
1792 const Node& dest = operation[0];
1793 const Node& src = operation[1];
1794
1795 std::string target;
1796
1797 if (const auto pred = std::get_if<PredicateNode>(&*dest)) {
1798 ASSERT_MSG(!pred->IsNegated(), "Negating logical assignment");
1799
1800 const Tegra::Shader::Pred index = pred->GetIndex();
1801 switch (index) {
1802 case Tegra::Shader::Pred::NeverExecute:
1803 case Tegra::Shader::Pred::UnusedIndex:
1804 // Writing to these predicates is a no-op
1805 return {};
1806 }
1807 target = fmt::format("P{}.x", static_cast<u64>(index));
1808 } else if (const auto internal_flag = std::get_if<InternalFlagNode>(&*dest)) {
1809 const std::size_t index = static_cast<std::size_t>(internal_flag->GetFlag());
1810 target = fmt::format("{}.x", INTERNAL_FLAG_NAMES[index]);
1811 } else {
1812 UNREACHABLE();
1813 ResetTemporaries();
1814 return {};
1815 }
1816
1817 AddLine("MOV.U {}, {};", target, Visit(src));
1818 ResetTemporaries();
1819 return {};
1820}
1821
1822std::string ARBDecompiler::LogicalPick2(Operation operation) {
1823 std::string temporary = AllocTemporary();
1824 const u32 index = std::get<ImmediateNode>(*operation[1]).GetValue();
1825 AddLine("MOV.U {}, {}.{};", temporary, Visit(operation[0]), Swizzle(index));
1826 return temporary;
1827}
1828
1829std::string ARBDecompiler::LogicalAnd2(Operation operation) {
1830 std::string temporary = AllocTemporary();
1831 const std::string op = Visit(operation[0]);
1832 AddLine("AND.U {}, {}.x, {}.y;", temporary, op, op);
1833 return temporary;
1834}
1835
1836std::string ARBDecompiler::FloatOrdered(Operation operation) {
1837 std::string temporary = AllocTemporary();
1838 AddLine("MOVC.F32 RC.x, {};", Visit(operation[0]));
1839 AddLine("MOVC.F32 RC.y, {};", Visit(operation[1]));
1840 AddLine("MOV.S {}, -1;", temporary);
1841 AddLine("MOV.S {} (NAN.x), 0;", temporary);
1842 AddLine("MOV.S {} (NAN.y), 0;", temporary);
1843 return temporary;
1844}
1845
1846std::string ARBDecompiler::FloatUnordered(Operation operation) {
1847 std::string temporary = AllocTemporary();
1848 AddLine("MOVC.F32 RC.x, {};", Visit(operation[0]));
1849 AddLine("MOVC.F32 RC.y, {};", Visit(operation[1]));
1850 AddLine("MOV.S {}, 0;", temporary);
1851 AddLine("MOV.S {} (NAN.x), -1;", temporary);
1852 AddLine("MOV.S {} (NAN.y), -1;", temporary);
1853 return temporary;
1854}
1855
1856std::string ARBDecompiler::LogicalAddCarry(Operation operation) {
1857 std::string temporary = AllocTemporary();
1858 AddLine("ADDC.U RC, {}, {};", Visit(operation[0]), Visit(operation[1]));
1859 AddLine("MOV.S {}, 0;", temporary);
1860 AddLine("IF CF.x;");
1861 AddLine("MOV.S {}, -1;", temporary);
1862 AddLine("ENDIF;");
1863 return temporary;
1864}
1865
1866std::string ARBDecompiler::Texture(Operation operation) {
1867 const auto& meta = std::get<MetaTexture>(operation.GetMeta());
1868 const u32 sampler_id = device.GetBaseBindings(stage).sampler + meta.sampler.index;
1869 const auto [coords, temporary, swizzle] = BuildCoords(operation);
1870
1871 std::string_view opcode = "TEX";
1872 std::string extra;
1873 if (meta.bias) {
1874 ASSERT(!meta.lod);
1875 opcode = "TXB";
1876
1877 if (swizzle < 4) {
1878 AddLine("MOV.F {}.w, {};", temporary, Visit(meta.bias));
1879 } else {
1880 const std::string bias = AllocTemporary();
1881 AddLine("MOV.F {}, {};", bias, Visit(meta.bias));
1882 extra = fmt::format(" {},", bias);
1883 }
1884 }
1885 if (meta.lod) {
1886 ASSERT(!meta.bias);
1887 opcode = "TXL";
1888
1889 if (swizzle < 4) {
1890 AddLine("MOV.F {}.w, {};", temporary, Visit(meta.lod));
1891 } else {
1892 const std::string lod = AllocTemporary();
1893 AddLine("MOV.F {}, {};", lod, Visit(meta.lod));
1894 extra = fmt::format(" {},", lod);
1895 }
1896 }
1897
1898 AddLine("{}.F {}, {},{} texture[{}], {}{};", opcode, temporary, coords, extra, sampler_id,
1899 TextureType(meta), BuildAoffi(operation));
1900 AddLine("MOV.U {}.x, {}.{};", temporary, temporary, Swizzle(meta.element));
1901 return fmt::format("{}.x", temporary);
1902}
1903
1904std::string ARBDecompiler::TextureGather(Operation operation) {
1905 const auto& meta = std::get<MetaTexture>(operation.GetMeta());
1906 const u32 sampler_id = device.GetBaseBindings(stage).sampler + meta.sampler.index;
1907 const auto [coords, temporary, swizzle] = BuildCoords(operation);
1908
1909 std::string comp;
1910 if (!meta.sampler.is_shadow) {
1911 const auto& immediate = std::get<ImmediateNode>(*meta.component);
1912 comp = fmt::format(".{}", Swizzle(immediate.GetValue()));
1913 }
1914
1915 AddLine("TXG.F {}, {}, texture[{}]{}, {}{};", temporary, temporary, sampler_id, comp,
1916 TextureType(meta), BuildAoffi(operation));
1917 AddLine("MOV.U {}.x, {}.{};", temporary, coords, Swizzle(meta.element));
1918 return fmt::format("{}.x", temporary);
1919}
1920
1921std::string ARBDecompiler::TextureQueryDimensions(Operation operation) {
1922 const auto& meta = std::get<MetaTexture>(operation.GetMeta());
1923 const std::string temporary = AllocVectorTemporary();
1924 const u32 sampler_id = device.GetBaseBindings(stage).sampler + meta.sampler.index;
1925
1926 ASSERT(!meta.sampler.is_array);
1927
1928 const std::string lod = operation.GetOperandsCount() > 0 ? Visit(operation[0]) : "0";
1929 AddLine("TXQ {}, {}, texture[{}], {};", temporary, lod, sampler_id, TextureType(meta));
1930 AddLine("MOV.U {}.x, {}.{};", temporary, temporary, Swizzle(meta.element));
1931 return fmt::format("{}.x", temporary);
1932}
1933
1934std::string ARBDecompiler::TextureQueryLod(Operation operation) {
1935 const auto& meta = std::get<MetaTexture>(operation.GetMeta());
1936 const std::string temporary = AllocVectorTemporary();
1937 const u32 sampler_id = device.GetBaseBindings(stage).sampler + meta.sampler.index;
1938
1939 ASSERT(!meta.sampler.is_array);
1940
1941 const std::size_t count = operation.GetOperandsCount();
1942 for (std::size_t i = 0; i < count; ++i) {
1943 AddLine("MOV.F {}.{}, {};", temporary, Swizzle(i), Visit(operation[i]));
1944 }
1945 AddLine("LOD.F {}, {}, texture[{}], {};", temporary, temporary, sampler_id, TextureType(meta));
1946 AddLine("MUL.F32 {}, {}, {{256, 256, 0, 0}};", temporary, temporary);
1947 AddLine("TRUNC.S {}, {};", temporary, temporary);
1948 AddLine("MOV.U {}.x, {}.{};", temporary, temporary, Swizzle(meta.element));
1949 return fmt::format("{}.x", temporary);
1950}
1951
1952std::string ARBDecompiler::TexelFetch(Operation operation) {
1953 const auto& meta = std::get<MetaTexture>(operation.GetMeta());
1954 const u32 sampler_id = device.GetBaseBindings(stage).sampler + meta.sampler.index;
1955 const auto [coords, temporary, swizzle] = BuildCoords(operation);
1956
1957 if (!meta.sampler.is_buffer) {
1958 ASSERT(swizzle < 4);
1959 AddLine("MOV.F {}.w, {};", temporary, Visit(meta.lod));
1960 }
1961 AddLine("TXF.F {}, {}, texture[{}], {}{};", temporary, coords, sampler_id, TextureType(meta),
1962 BuildAoffi(operation));
1963 AddLine("MOV.U {}.x, {}.{};", temporary, temporary, Swizzle(meta.element));
1964 return fmt::format("{}.x", temporary);
1965}
1966
1967std::string ARBDecompiler::TextureGradient(Operation operation) {
1968 const auto& meta = std::get<MetaTexture>(operation.GetMeta());
1969 const u32 sampler_id = device.GetBaseBindings(stage).sampler + meta.sampler.index;
1970 const std::string ddx = AllocVectorTemporary();
1971 const std::string ddy = AllocVectorTemporary();
1972 const std::string coord = std::get<1>(BuildCoords(operation));
1973
1974 const std::size_t num_components = meta.derivates.size() / 2;
1975 for (std::size_t index = 0; index < num_components; ++index) {
1976 const char swizzle = Swizzle(index);
1977 AddLine("MOV.F {}.{}, {};", ddx, swizzle, Visit(meta.derivates[index * 2]));
1978 AddLine("MOV.F {}.{}, {};", ddy, swizzle, Visit(meta.derivates[index * 2 + 1]));
1979 }
1980
1981 const std::string_view result = coord;
1982 AddLine("TXD.F {}, {}, {}, {}, texture[{}], {}{};", result, coord, ddx, ddy, sampler_id,
1983 TextureType(meta), BuildAoffi(operation));
1984 AddLine("MOV.F {}.x, {}.{};", result, result, Swizzle(meta.element));
1985 return fmt::format("{}.x", result);
1986}
1987
1988std::string ARBDecompiler::ImageLoad(Operation operation) {
1989 const auto& meta = std::get<MetaImage>(operation.GetMeta());
1990 const u32 image_id = device.GetBaseBindings(stage).image + meta.image.index;
1991 const std::size_t count = operation.GetOperandsCount();
1992 const std::string_view type = ImageType(meta.image.type);
1993
1994 const std::string temporary = AllocVectorTemporary();
1995 for (std::size_t i = 0; i < count; ++i) {
1996 AddLine("MOV.S {}.{}, {};", temporary, Swizzle(i), Visit(operation[i]));
1997 }
1998 AddLine("LOADIM.F {}, {}, image[{}], {};", temporary, temporary, image_id, type);
1999 AddLine("MOV.F {}.x, {}.{};", temporary, temporary, Swizzle(meta.element));
2000 return fmt::format("{}.x", temporary);
2001}
2002
2003std::string ARBDecompiler::ImageStore(Operation operation) {
2004 const auto& meta = std::get<MetaImage>(operation.GetMeta());
2005 const u32 image_id = device.GetBaseBindings(stage).image + meta.image.index;
2006 const std::size_t num_coords = operation.GetOperandsCount();
2007 const std::size_t num_values = meta.values.size();
2008 const std::string_view type = ImageType(meta.image.type);
2009
2010 const std::string coord = AllocVectorTemporary();
2011 const std::string value = AllocVectorTemporary();
2012 for (std::size_t i = 0; i < num_coords; ++i) {
2013 AddLine("MOV.S {}.{}, {};", coord, Swizzle(i), Visit(operation[i]));
2014 }
2015 for (std::size_t i = 0; i < num_values; ++i) {
2016 AddLine("MOV.F {}.{}, {};", value, Swizzle(i), Visit(meta.values[i]));
2017 }
2018 AddLine("STOREIM.F image[{}], {}, {}, {};", image_id, value, coord, type);
2019 return {};
2020}
2021
2022std::string ARBDecompiler::Branch(Operation operation) {
2023 const auto target = std::get<ImmediateNode>(*operation[0]);
2024 AddLine("MOV.U PC.x, {};", target.GetValue());
2025 AddLine("CONT;");
2026 return {};
2027}
2028
2029std::string ARBDecompiler::BranchIndirect(Operation operation) {
2030 AddLine("MOV.U PC.x, {};", Visit(operation[0]));
2031 AddLine("CONT;");
2032 return {};
2033}
2034
2035std::string ARBDecompiler::PushFlowStack(Operation operation) {
2036 const auto stack = std::get<MetaStackClass>(operation.GetMeta());
2037 const u32 target = std::get<ImmediateNode>(*operation[0]).GetValue();
2038 const std::string_view stack_name = StackName(stack);
2039 AddLine("MOV.U {}[{}_TOP.x].x, {};", stack_name, stack_name, target);
2040 AddLine("ADD.S {}_TOP.x, {}_TOP.x, 1;", stack_name, stack_name);
2041 return {};
2042}
2043
2044std::string ARBDecompiler::PopFlowStack(Operation operation) {
2045 const auto stack = std::get<MetaStackClass>(operation.GetMeta());
2046 const std::string_view stack_name = StackName(stack);
2047 AddLine("SUB.S {}_TOP.x, {}_TOP.x, 1;", stack_name, stack_name);
2048 AddLine("MOV.U PC.x, {}[{}_TOP.x].x;", stack_name, stack_name);
2049 AddLine("CONT;");
2050 return {};
2051}
2052
2053std::string ARBDecompiler::Exit(Operation) {
2054 Exit();
2055 return {};
2056}
2057
2058std::string ARBDecompiler::Discard(Operation) {
2059 AddLine("KIL TR;");
2060 return {};
2061}
2062
2063std::string ARBDecompiler::EmitVertex(Operation) {
2064 AddLine("EMIT;");
2065 return {};
2066}
2067
2068std::string ARBDecompiler::EndPrimitive(Operation) {
2069 AddLine("ENDPRIM;");
2070 return {};
2071}
2072
2073std::string ARBDecompiler::InvocationId(Operation) {
2074 return "primitive.invocation";
2075}
2076
2077std::string ARBDecompiler::YNegate(Operation) {
2078 LOG_WARNING(Render_OpenGL, "(STUBBED)");
2079 std::string temporary = AllocTemporary();
2080 AddLine("MOV.F {}, 1;", temporary);
2081 return temporary;
2082}
2083
2084std::string ARBDecompiler::ThreadId(Operation) {
2085 return fmt::format("{}.threadid", StageInputName(stage));
2086}
2087
2088std::string ARBDecompiler::ShuffleIndexed(Operation operation) {
2089 if (!device.HasWarpIntrinsics()) {
2090 LOG_ERROR(Render_OpenGL,
2091 "NV_shader_thread_shuffle is missing. Kepler or better is required.");
2092 return Visit(operation[0]);
2093 }
2094 const std::string temporary = AllocVectorTemporary();
2095 AddLine("SHFIDX.U {}, {}, {}, {{31, 0, 0, 0}};", temporary, Visit(operation[0]),
2096 Visit(operation[1]));
2097 AddLine("MOV.U {}.x, {}.y;", temporary, temporary);
2098 return fmt::format("{}.x", temporary);
2099}
2100
2101std::string ARBDecompiler::Barrier(Operation) {
2102 AddLine("BAR;");
2103 return {};
2104}
2105
2106std::string ARBDecompiler::MemoryBarrierGroup(Operation) {
2107 AddLine("MEMBAR.CTA;");
2108 return {};
2109}
2110
2111std::string ARBDecompiler::MemoryBarrierGlobal(Operation) {
2112 AddLine("MEMBAR;");
2113 return {};
2114}
2115
2116} // Anonymous namespace
2117
2118std::string DecompileAssemblyShader(const Device& device, const VideoCommon::Shader::ShaderIR& ir,
2119 const VideoCommon::Shader::Registry& registry,
2120 Tegra::Engines::ShaderType stage, std::string_view identifier) {
2121 return ARBDecompiler(device, ir, registry, stage, identifier).Code();
2122}
2123
2124} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_arb_decompiler.h b/src/video_core/renderer_opengl/gl_arb_decompiler.h
deleted file mode 100644
index 6afc87220..000000000
--- a/src/video_core/renderer_opengl/gl_arb_decompiler.h
+++ /dev/null
@@ -1,29 +0,0 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <string>
8#include <string_view>
9
10#include "common/common_types.h"
11
12namespace Tegra::Engines {
13enum class ShaderType : u32;
14}
15
16namespace VideoCommon::Shader {
17class ShaderIR;
18class Registry;
19} // namespace VideoCommon::Shader
20
21namespace OpenGL {
22
23class Device;
24
25std::string DecompileAssemblyShader(const Device& device, const VideoCommon::Shader::ShaderIR& ir,
26 const VideoCommon::Shader::Registry& registry,
27 Tegra::Engines::ShaderType stage, std::string_view identifier);
28
29} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.cpp b/src/video_core/renderer_opengl/gl_buffer_cache.cpp
index a02a45e04..07a995f7d 100644
--- a/src/video_core/renderer_opengl/gl_buffer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp
@@ -2,14 +2,18 @@
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include <algorithm>
5#include <span> 6#include <span>
6 7
7#include "video_core/buffer_cache/buffer_cache.h" 8#include "video_core/buffer_cache/buffer_cache.h"
8#include "video_core/renderer_opengl/gl_buffer_cache.h" 9#include "video_core/renderer_opengl/gl_buffer_cache.h"
9#include "video_core/renderer_opengl/gl_device.h" 10#include "video_core/renderer_opengl/gl_device.h"
11#include "video_core/renderer_opengl/maxwell_to_gl.h"
10 12
11namespace OpenGL { 13namespace OpenGL {
12namespace { 14namespace {
15using VideoCore::Surface::PixelFormat;
16
13struct BindlessSSBO { 17struct BindlessSSBO {
14 GLuint64EXT address; 18 GLuint64EXT address;
15 GLsizei length; 19 GLsizei length;
@@ -21,6 +25,25 @@ constexpr std::array PROGRAM_LUT{
21 GL_VERTEX_PROGRAM_NV, GL_TESS_CONTROL_PROGRAM_NV, GL_TESS_EVALUATION_PROGRAM_NV, 25 GL_VERTEX_PROGRAM_NV, GL_TESS_CONTROL_PROGRAM_NV, GL_TESS_EVALUATION_PROGRAM_NV,
22 GL_GEOMETRY_PROGRAM_NV, GL_FRAGMENT_PROGRAM_NV, 26 GL_GEOMETRY_PROGRAM_NV, GL_FRAGMENT_PROGRAM_NV,
23}; 27};
28
29[[nodiscard]] GLenum GetTextureBufferFormat(GLenum gl_format) {
30 switch (gl_format) {
31 case GL_RGBA8_SNORM:
32 return GL_RGBA8;
33 case GL_R8_SNORM:
34 return GL_R8;
35 case GL_RGBA16_SNORM:
36 return GL_RGBA16;
37 case GL_R16_SNORM:
38 return GL_R16;
39 case GL_RG16_SNORM:
40 return GL_RG16;
41 case GL_RG8_SNORM:
42 return GL_RG8;
43 default:
44 return gl_format;
45 }
46}
24} // Anonymous namespace 47} // Anonymous namespace
25 48
26Buffer::Buffer(BufferCacheRuntime&, VideoCommon::NullBufferParams null_params) 49Buffer::Buffer(BufferCacheRuntime&, VideoCommon::NullBufferParams null_params)
@@ -62,6 +85,30 @@ void Buffer::MakeResident(GLenum access) noexcept {
62 glMakeNamedBufferResidentNV(buffer.handle, access); 85 glMakeNamedBufferResidentNV(buffer.handle, access);
63} 86}
64 87
88GLuint Buffer::View(u32 offset, u32 size, PixelFormat format) {
89 const auto it{std::ranges::find_if(views, [offset, size, format](const BufferView& view) {
90 return offset == view.offset && size == view.size && format == view.format;
91 })};
92 if (it != views.end()) {
93 return it->texture.handle;
94 }
95 OGLTexture texture;
96 texture.Create(GL_TEXTURE_BUFFER);
97 const GLenum gl_format{MaxwellToGL::GetFormatTuple(format).internal_format};
98 const GLenum texture_format{GetTextureBufferFormat(gl_format)};
99 if (texture_format != gl_format) {
100 LOG_WARNING(Render_OpenGL, "Emulating SNORM texture buffer with UNORM.");
101 }
102 glTextureBufferRange(texture.handle, texture_format, buffer.handle, offset, size);
103 views.push_back({
104 .offset = offset,
105 .size = size,
106 .format = format,
107 .texture = std::move(texture),
108 });
109 return views.back().texture.handle;
110}
111
65BufferCacheRuntime::BufferCacheRuntime(const Device& device_) 112BufferCacheRuntime::BufferCacheRuntime(const Device& device_)
66 : device{device_}, has_fast_buffer_sub_data{device.HasFastBufferSubData()}, 113 : device{device_}, has_fast_buffer_sub_data{device.HasFastBufferSubData()},
67 use_assembly_shaders{device.UseAssemblyShaders()}, 114 use_assembly_shaders{device.UseAssemblyShaders()},
@@ -144,7 +191,7 @@ void BufferCacheRuntime::BindUniformBuffer(size_t stage, u32 binding_index, Buff
144 glBindBufferRangeNV(PABO_LUT[stage], binding_index, handle, 0, 191 glBindBufferRangeNV(PABO_LUT[stage], binding_index, handle, 0,
145 static_cast<GLsizeiptr>(size)); 192 static_cast<GLsizeiptr>(size));
146 } else { 193 } else {
147 const GLuint base_binding = device.GetBaseBindings(stage).uniform_buffer; 194 const GLuint base_binding = graphics_base_uniform_bindings[stage];
148 const GLuint binding = base_binding + binding_index; 195 const GLuint binding = base_binding + binding_index;
149 glBindBufferRange(GL_UNIFORM_BUFFER, binding, buffer.Handle(), 196 glBindBufferRange(GL_UNIFORM_BUFFER, binding, buffer.Handle(),
150 static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size)); 197 static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size));
@@ -171,7 +218,12 @@ void BufferCacheRuntime::BindComputeUniformBuffer(u32 binding_index, Buffer& buf
171 218
172void BufferCacheRuntime::BindStorageBuffer(size_t stage, u32 binding_index, Buffer& buffer, 219void BufferCacheRuntime::BindStorageBuffer(size_t stage, u32 binding_index, Buffer& buffer,
173 u32 offset, u32 size, bool is_written) { 220 u32 offset, u32 size, bool is_written) {
174 if (use_assembly_shaders) { 221 if (use_storage_buffers) {
222 const GLuint base_binding = graphics_base_storage_bindings[stage];
223 const GLuint binding = base_binding + binding_index;
224 glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding, buffer.Handle(),
225 static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size));
226 } else {
175 const BindlessSSBO ssbo{ 227 const BindlessSSBO ssbo{
176 .address = buffer.HostGpuAddr() + offset, 228 .address = buffer.HostGpuAddr() + offset,
177 .length = static_cast<GLsizei>(size), 229 .length = static_cast<GLsizei>(size),
@@ -180,17 +232,19 @@ void BufferCacheRuntime::BindStorageBuffer(size_t stage, u32 binding_index, Buff
180 buffer.MakeResident(is_written ? GL_READ_WRITE : GL_READ_ONLY); 232 buffer.MakeResident(is_written ? GL_READ_WRITE : GL_READ_ONLY);
181 glProgramLocalParametersI4uivNV(PROGRAM_LUT[stage], binding_index, 1, 233 glProgramLocalParametersI4uivNV(PROGRAM_LUT[stage], binding_index, 1,
182 reinterpret_cast<const GLuint*>(&ssbo)); 234 reinterpret_cast<const GLuint*>(&ssbo));
183 } else {
184 const GLuint base_binding = device.GetBaseBindings(stage).shader_storage_buffer;
185 const GLuint binding = base_binding + binding_index;
186 glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding, buffer.Handle(),
187 static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size));
188 } 235 }
189} 236}
190 237
191void BufferCacheRuntime::BindComputeStorageBuffer(u32 binding_index, Buffer& buffer, u32 offset, 238void BufferCacheRuntime::BindComputeStorageBuffer(u32 binding_index, Buffer& buffer, u32 offset,
192 u32 size, bool is_written) { 239 u32 size, bool is_written) {
193 if (use_assembly_shaders) { 240 if (use_storage_buffers) {
241 if (size != 0) {
242 glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding_index, buffer.Handle(),
243 static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size));
244 } else {
245 glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding_index, 0, 0, 0);
246 }
247 } else {
194 const BindlessSSBO ssbo{ 248 const BindlessSSBO ssbo{
195 .address = buffer.HostGpuAddr() + offset, 249 .address = buffer.HostGpuAddr() + offset,
196 .length = static_cast<GLsizei>(size), 250 .length = static_cast<GLsizei>(size),
@@ -199,11 +253,6 @@ void BufferCacheRuntime::BindComputeStorageBuffer(u32 binding_index, Buffer& buf
199 buffer.MakeResident(is_written ? GL_READ_WRITE : GL_READ_ONLY); 253 buffer.MakeResident(is_written ? GL_READ_WRITE : GL_READ_ONLY);
200 glProgramLocalParametersI4uivNV(GL_COMPUTE_PROGRAM_NV, binding_index, 1, 254 glProgramLocalParametersI4uivNV(GL_COMPUTE_PROGRAM_NV, binding_index, 1,
201 reinterpret_cast<const GLuint*>(&ssbo)); 255 reinterpret_cast<const GLuint*>(&ssbo));
202 } else if (size == 0) {
203 glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding_index, 0, 0, 0);
204 } else {
205 glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding_index, buffer.Handle(),
206 static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size));
207 } 256 }
208} 257}
209 258
@@ -213,4 +262,13 @@ void BufferCacheRuntime::BindTransformFeedbackBuffer(u32 index, Buffer& buffer,
213 static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size)); 262 static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size));
214} 263}
215 264
265void BufferCacheRuntime::BindTextureBuffer(Buffer& buffer, u32 offset, u32 size,
266 PixelFormat format) {
267 *texture_handles++ = buffer.View(offset, size, format);
268}
269
270void BufferCacheRuntime::BindImageBuffer(Buffer& buffer, u32 offset, u32 size, PixelFormat format) {
271 *image_handles++ = buffer.View(offset, size, format);
272}
273
216} // namespace OpenGL 274} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.h b/src/video_core/renderer_opengl/gl_buffer_cache.h
index fe91aa452..060d36427 100644
--- a/src/video_core/renderer_opengl/gl_buffer_cache.h
+++ b/src/video_core/renderer_opengl/gl_buffer_cache.h
@@ -32,6 +32,8 @@ public:
32 32
33 void MakeResident(GLenum access) noexcept; 33 void MakeResident(GLenum access) noexcept;
34 34
35 [[nodiscard]] GLuint View(u32 offset, u32 size, VideoCore::Surface::PixelFormat format);
36
35 [[nodiscard]] GLuint64EXT HostGpuAddr() const noexcept { 37 [[nodiscard]] GLuint64EXT HostGpuAddr() const noexcept {
36 return address; 38 return address;
37 } 39 }
@@ -41,9 +43,17 @@ public:
41 } 43 }
42 44
43private: 45private:
46 struct BufferView {
47 u32 offset;
48 u32 size;
49 VideoCore::Surface::PixelFormat format;
50 OGLTexture texture;
51 };
52
44 GLuint64EXT address = 0; 53 GLuint64EXT address = 0;
45 OGLBuffer buffer; 54 OGLBuffer buffer;
46 GLenum current_residency_access = GL_NONE; 55 GLenum current_residency_access = GL_NONE;
56 std::vector<BufferView> views;
47}; 57};
48 58
49class BufferCacheRuntime { 59class BufferCacheRuntime {
@@ -75,17 +85,21 @@ public:
75 85
76 void BindTransformFeedbackBuffer(u32 index, Buffer& buffer, u32 offset, u32 size); 86 void BindTransformFeedbackBuffer(u32 index, Buffer& buffer, u32 offset, u32 size);
77 87
88 void BindTextureBuffer(Buffer& buffer, u32 offset, u32 size,
89 VideoCore::Surface::PixelFormat format);
90
91 void BindImageBuffer(Buffer& buffer, u32 offset, u32 size,
92 VideoCore::Surface::PixelFormat format);
93
78 void BindFastUniformBuffer(size_t stage, u32 binding_index, u32 size) { 94 void BindFastUniformBuffer(size_t stage, u32 binding_index, u32 size) {
95 const GLuint handle = fast_uniforms[stage][binding_index].handle;
96 const GLsizeiptr gl_size = static_cast<GLsizeiptr>(size);
79 if (use_assembly_shaders) { 97 if (use_assembly_shaders) {
80 const GLuint handle = fast_uniforms[stage][binding_index].handle;
81 const GLsizeiptr gl_size = static_cast<GLsizeiptr>(size);
82 glBindBufferRangeNV(PABO_LUT[stage], binding_index, handle, 0, gl_size); 98 glBindBufferRangeNV(PABO_LUT[stage], binding_index, handle, 0, gl_size);
83 } else { 99 } else {
84 const GLuint base_binding = device.GetBaseBindings(stage).uniform_buffer; 100 const GLuint base_binding = graphics_base_uniform_bindings[stage];
85 const GLuint binding = base_binding + binding_index; 101 const GLuint binding = base_binding + binding_index;
86 glBindBufferRange(GL_UNIFORM_BUFFER, binding, 102 glBindBufferRange(GL_UNIFORM_BUFFER, binding, handle, 0, gl_size);
87 fast_uniforms[stage][binding_index].handle, 0,
88 static_cast<GLsizeiptr>(size));
89 } 103 }
90 } 104 }
91 105
@@ -103,7 +117,7 @@ public:
103 117
104 std::span<u8> BindMappedUniformBuffer(size_t stage, u32 binding_index, u32 size) noexcept { 118 std::span<u8> BindMappedUniformBuffer(size_t stage, u32 binding_index, u32 size) noexcept {
105 const auto [mapped_span, offset] = stream_buffer->Request(static_cast<size_t>(size)); 119 const auto [mapped_span, offset] = stream_buffer->Request(static_cast<size_t>(size));
106 const GLuint base_binding = device.GetBaseBindings(stage).uniform_buffer; 120 const GLuint base_binding = graphics_base_uniform_bindings[stage];
107 const GLuint binding = base_binding + binding_index; 121 const GLuint binding = base_binding + binding_index;
108 glBindBufferRange(GL_UNIFORM_BUFFER, binding, stream_buffer->Handle(), 122 glBindBufferRange(GL_UNIFORM_BUFFER, binding, stream_buffer->Handle(),
109 static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size)); 123 static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size));
@@ -118,6 +132,27 @@ public:
118 return has_fast_buffer_sub_data; 132 return has_fast_buffer_sub_data;
119 } 133 }
120 134
135 [[nodiscard]] bool SupportsNonZeroUniformOffset() const noexcept {
136 return !use_assembly_shaders;
137 }
138
139 void SetBaseUniformBindings(const std::array<GLuint, 5>& bindings) {
140 graphics_base_uniform_bindings = bindings;
141 }
142
143 void SetBaseStorageBindings(const std::array<GLuint, 5>& bindings) {
144 graphics_base_storage_bindings = bindings;
145 }
146
147 void SetImagePointers(GLuint* texture_handles_, GLuint* image_handles_) {
148 texture_handles = texture_handles_;
149 image_handles = image_handles_;
150 }
151
152 void SetEnableStorageBuffers(bool use_storage_buffers_) {
153 use_storage_buffers = use_storage_buffers_;
154 }
155
121private: 156private:
122 static constexpr std::array PABO_LUT{ 157 static constexpr std::array PABO_LUT{
123 GL_VERTEX_PROGRAM_PARAMETER_BUFFER_NV, GL_TESS_CONTROL_PROGRAM_PARAMETER_BUFFER_NV, 158 GL_VERTEX_PROGRAM_PARAMETER_BUFFER_NV, GL_TESS_CONTROL_PROGRAM_PARAMETER_BUFFER_NV,
@@ -131,8 +166,15 @@ private:
131 bool use_assembly_shaders = false; 166 bool use_assembly_shaders = false;
132 bool has_unified_vertex_buffers = false; 167 bool has_unified_vertex_buffers = false;
133 168
169 bool use_storage_buffers = false;
170
134 u32 max_attributes = 0; 171 u32 max_attributes = 0;
135 172
173 std::array<GLuint, 5> graphics_base_uniform_bindings{};
174 std::array<GLuint, 5> graphics_base_storage_bindings{};
175 GLuint* texture_handles = nullptr;
176 GLuint* image_handles = nullptr;
177
136 std::optional<StreamBuffer> stream_buffer; 178 std::optional<StreamBuffer> stream_buffer;
137 179
138 std::array<std::array<OGLBuffer, VideoCommon::NUM_GRAPHICS_UNIFORM_BUFFERS>, 180 std::array<std::array<OGLBuffer, VideoCommon::NUM_GRAPHICS_UNIFORM_BUFFERS>,
@@ -156,6 +198,7 @@ struct BufferCacheParams {
156 static constexpr bool NEEDS_BIND_UNIFORM_INDEX = true; 198 static constexpr bool NEEDS_BIND_UNIFORM_INDEX = true;
157 static constexpr bool NEEDS_BIND_STORAGE_INDEX = true; 199 static constexpr bool NEEDS_BIND_STORAGE_INDEX = true;
158 static constexpr bool USE_MEMORY_MAPS = false; 200 static constexpr bool USE_MEMORY_MAPS = false;
201 static constexpr bool SEPARATE_IMAGE_BUFFER_BINDINGS = true;
159}; 202};
160 203
161using BufferCache = VideoCommon::BufferCache<BufferCacheParams>; 204using BufferCache = VideoCommon::BufferCache<BufferCacheParams>;
diff --git a/src/video_core/renderer_opengl/gl_compute_pipeline.cpp b/src/video_core/renderer_opengl/gl_compute_pipeline.cpp
new file mode 100644
index 000000000..aa1cc592f
--- /dev/null
+++ b/src/video_core/renderer_opengl/gl_compute_pipeline.cpp
@@ -0,0 +1,209 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <cstring>
6
7#include "common/cityhash.h"
8#include "common/settings.h" // for enum class Settings::ShaderBackend
9#include "video_core/renderer_opengl/gl_compute_pipeline.h"
10#include "video_core/renderer_opengl/gl_shader_manager.h"
11#include "video_core/renderer_opengl/gl_shader_util.h"
12
13namespace OpenGL {
14
15using Shader::ImageBufferDescriptor;
16using Tegra::Texture::TexturePair;
17using VideoCommon::ImageId;
18
19constexpr u32 MAX_TEXTURES = 64;
20constexpr u32 MAX_IMAGES = 16;
21
22template <typename Range>
23u32 AccumulateCount(const Range& range) {
24 u32 num{};
25 for (const auto& desc : range) {
26 num += desc.count;
27 }
28 return num;
29}
30
31size_t ComputePipelineKey::Hash() const noexcept {
32 return static_cast<size_t>(
33 Common::CityHash64(reinterpret_cast<const char*>(this), sizeof *this));
34}
35
36bool ComputePipelineKey::operator==(const ComputePipelineKey& rhs) const noexcept {
37 return std::memcmp(this, &rhs, sizeof *this) == 0;
38}
39
40ComputePipeline::ComputePipeline(const Device& device, TextureCache& texture_cache_,
41 BufferCache& buffer_cache_, Tegra::MemoryManager& gpu_memory_,
42 Tegra::Engines::KeplerCompute& kepler_compute_,
43 ProgramManager& program_manager_, const Shader::Info& info_,
44 std::string code, std::vector<u32> code_v)
45 : texture_cache{texture_cache_}, buffer_cache{buffer_cache_}, gpu_memory{gpu_memory_},
46 kepler_compute{kepler_compute_}, program_manager{program_manager_}, info{info_} {
47 switch (device.GetShaderBackend()) {
48 case Settings::ShaderBackend::GLSL:
49 source_program = CreateProgram(code, GL_COMPUTE_SHADER);
50 break;
51 case Settings::ShaderBackend::GLASM:
52 assembly_program = CompileProgram(code, GL_COMPUTE_PROGRAM_NV);
53 break;
54 case Settings::ShaderBackend::SPIRV:
55 source_program = CreateProgram(code_v, GL_COMPUTE_SHADER);
56 break;
57 }
58 std::copy_n(info.constant_buffer_used_sizes.begin(), uniform_buffer_sizes.size(),
59 uniform_buffer_sizes.begin());
60
61 num_texture_buffers = AccumulateCount(info.texture_buffer_descriptors);
62 num_image_buffers = AccumulateCount(info.image_buffer_descriptors);
63
64 const u32 num_textures{num_texture_buffers + AccumulateCount(info.texture_descriptors)};
65 ASSERT(num_textures <= MAX_TEXTURES);
66
67 const u32 num_images{num_image_buffers + AccumulateCount(info.image_descriptors)};
68 ASSERT(num_images <= MAX_IMAGES);
69
70 const bool is_glasm{assembly_program.handle != 0};
71 const u32 num_storage_buffers{AccumulateCount(info.storage_buffers_descriptors)};
72 use_storage_buffers =
73 !is_glasm || num_storage_buffers < device.GetMaxGLASMStorageBufferBlocks();
74 writes_global_memory = !use_storage_buffers &&
75 std::ranges::any_of(info.storage_buffers_descriptors,
76 [](const auto& desc) { return desc.is_written; });
77}
78
79void ComputePipeline::Configure() {
80 buffer_cache.SetComputeUniformBufferState(info.constant_buffer_mask, &uniform_buffer_sizes);
81 buffer_cache.UnbindComputeStorageBuffers();
82 size_t ssbo_index{};
83 for (const auto& desc : info.storage_buffers_descriptors) {
84 ASSERT(desc.count == 1);
85 buffer_cache.BindComputeStorageBuffer(ssbo_index, desc.cbuf_index, desc.cbuf_offset,
86 desc.is_written);
87 ++ssbo_index;
88 }
89 texture_cache.SynchronizeComputeDescriptors();
90
91 std::array<ImageViewId, MAX_TEXTURES + MAX_IMAGES> image_view_ids;
92 boost::container::static_vector<u32, MAX_TEXTURES + MAX_IMAGES> image_view_indices;
93 std::array<GLuint, MAX_TEXTURES> samplers;
94 std::array<GLuint, MAX_TEXTURES> textures;
95 std::array<GLuint, MAX_IMAGES> images;
96 GLsizei sampler_binding{};
97 GLsizei texture_binding{};
98 GLsizei image_binding{};
99
100 const auto& qmd{kepler_compute.launch_description};
101 const auto& cbufs{qmd.const_buffer_config};
102 const bool via_header_index{qmd.linked_tsc != 0};
103 const auto read_handle{[&](const auto& desc, u32 index) {
104 ASSERT(((qmd.const_buffer_enable_mask >> desc.cbuf_index) & 1) != 0);
105 const u32 index_offset{index << desc.size_shift};
106 const u32 offset{desc.cbuf_offset + index_offset};
107 const GPUVAddr addr{cbufs[desc.cbuf_index].Address() + offset};
108 if constexpr (std::is_same_v<decltype(desc), const Shader::TextureDescriptor&> ||
109 std::is_same_v<decltype(desc), const Shader::TextureBufferDescriptor&>) {
110 if (desc.has_secondary) {
111 ASSERT(((qmd.const_buffer_enable_mask >> desc.secondary_cbuf_index) & 1) != 0);
112 const u32 secondary_offset{desc.secondary_cbuf_offset + index_offset};
113 const GPUVAddr separate_addr{cbufs[desc.secondary_cbuf_index].Address() +
114 secondary_offset};
115 const u32 lhs_raw{gpu_memory.Read<u32>(addr)};
116 const u32 rhs_raw{gpu_memory.Read<u32>(separate_addr)};
117 return TexturePair(lhs_raw | rhs_raw, via_header_index);
118 }
119 }
120 return TexturePair(gpu_memory.Read<u32>(addr), via_header_index);
121 }};
122 const auto add_image{[&](const auto& desc) {
123 for (u32 index = 0; index < desc.count; ++index) {
124 const auto handle{read_handle(desc, index)};
125 image_view_indices.push_back(handle.first);
126 }
127 }};
128 for (const auto& desc : info.texture_buffer_descriptors) {
129 for (u32 index = 0; index < desc.count; ++index) {
130 const auto handle{read_handle(desc, index)};
131 image_view_indices.push_back(handle.first);
132 samplers[sampler_binding++] = 0;
133 }
134 }
135 std::ranges::for_each(info.image_buffer_descriptors, add_image);
136 for (const auto& desc : info.texture_descriptors) {
137 for (u32 index = 0; index < desc.count; ++index) {
138 const auto handle{read_handle(desc, index)};
139 image_view_indices.push_back(handle.first);
140
141 Sampler* const sampler = texture_cache.GetComputeSampler(handle.second);
142 samplers[sampler_binding++] = sampler->Handle();
143 }
144 }
145 std::ranges::for_each(info.image_descriptors, add_image);
146
147 const std::span indices_span(image_view_indices.data(), image_view_indices.size());
148 texture_cache.FillComputeImageViews(indices_span, image_view_ids);
149
150 if (assembly_program.handle != 0) {
151 program_manager.BindComputeAssemblyProgram(assembly_program.handle);
152 } else {
153 program_manager.BindComputeProgram(source_program.handle);
154 }
155 buffer_cache.UnbindComputeTextureBuffers();
156 size_t texbuf_index{};
157 const auto add_buffer{[&](const auto& desc) {
158 constexpr bool is_image = std::is_same_v<decltype(desc), const ImageBufferDescriptor&>;
159 for (u32 i = 0; i < desc.count; ++i) {
160 bool is_written{false};
161 if constexpr (is_image) {
162 is_written = desc.is_written;
163 }
164 ImageView& image_view{texture_cache.GetImageView(image_view_ids[texbuf_index])};
165 buffer_cache.BindComputeTextureBuffer(texbuf_index, image_view.GpuAddr(),
166 image_view.BufferSize(), image_view.format,
167 is_written, is_image);
168 ++texbuf_index;
169 }
170 }};
171 std::ranges::for_each(info.texture_buffer_descriptors, add_buffer);
172 std::ranges::for_each(info.image_buffer_descriptors, add_buffer);
173
174 buffer_cache.UpdateComputeBuffers();
175
176 buffer_cache.runtime.SetEnableStorageBuffers(use_storage_buffers);
177 buffer_cache.runtime.SetImagePointers(textures.data(), images.data());
178 buffer_cache.BindHostComputeBuffers();
179
180 const ImageId* views_it{image_view_ids.data() + num_texture_buffers + num_image_buffers};
181 texture_binding += num_texture_buffers;
182 image_binding += num_image_buffers;
183
184 for (const auto& desc : info.texture_descriptors) {
185 for (u32 index = 0; index < desc.count; ++index) {
186 ImageView& image_view{texture_cache.GetImageView(*(views_it++))};
187 textures[texture_binding++] = image_view.Handle(desc.type);
188 }
189 }
190 for (const auto& desc : info.image_descriptors) {
191 for (u32 index = 0; index < desc.count; ++index) {
192 ImageView& image_view{texture_cache.GetImageView(*(views_it++))};
193 if (desc.is_written) {
194 texture_cache.MarkModification(image_view.image_id);
195 }
196 images[image_binding++] = image_view.StorageView(desc.type, desc.format);
197 }
198 }
199 if (texture_binding != 0) {
200 ASSERT(texture_binding == sampler_binding);
201 glBindTextures(0, texture_binding, textures.data());
202 glBindSamplers(0, sampler_binding, samplers.data());
203 }
204 if (image_binding != 0) {
205 glBindImageTextures(0, image_binding, images.data());
206 }
207}
208
209} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_compute_pipeline.h b/src/video_core/renderer_opengl/gl_compute_pipeline.h
new file mode 100644
index 000000000..50c676365
--- /dev/null
+++ b/src/video_core/renderer_opengl/gl_compute_pipeline.h
@@ -0,0 +1,93 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <array>
8#include <type_traits>
9#include <utility>
10
11#include "common/common_types.h"
12#include "shader_recompiler/shader_info.h"
13#include "video_core/renderer_opengl/gl_buffer_cache.h"
14#include "video_core/renderer_opengl/gl_resource_manager.h"
15#include "video_core/renderer_opengl/gl_texture_cache.h"
16
17namespace Tegra {
18class MemoryManager;
19}
20
21namespace Tegra::Engines {
22class KeplerCompute;
23}
24
25namespace Shader {
26struct Info;
27}
28
29namespace OpenGL {
30
31class Device;
32class ProgramManager;
33
34struct ComputePipelineKey {
35 u64 unique_hash;
36 u32 shared_memory_size;
37 std::array<u32, 3> workgroup_size;
38
39 size_t Hash() const noexcept;
40
41 bool operator==(const ComputePipelineKey&) const noexcept;
42
43 bool operator!=(const ComputePipelineKey& rhs) const noexcept {
44 return !operator==(rhs);
45 }
46};
47static_assert(std::has_unique_object_representations_v<ComputePipelineKey>);
48static_assert(std::is_trivially_copyable_v<ComputePipelineKey>);
49static_assert(std::is_trivially_constructible_v<ComputePipelineKey>);
50
51class ComputePipeline {
52public:
53 explicit ComputePipeline(const Device& device, TextureCache& texture_cache_,
54 BufferCache& buffer_cache_, Tegra::MemoryManager& gpu_memory_,
55 Tegra::Engines::KeplerCompute& kepler_compute_,
56 ProgramManager& program_manager_, const Shader::Info& info_,
57 std::string code, std::vector<u32> code_v);
58
59 void Configure();
60
61 [[nodiscard]] bool WritesGlobalMemory() const noexcept {
62 return writes_global_memory;
63 }
64
65private:
66 TextureCache& texture_cache;
67 BufferCache& buffer_cache;
68 Tegra::MemoryManager& gpu_memory;
69 Tegra::Engines::KeplerCompute& kepler_compute;
70 ProgramManager& program_manager;
71
72 Shader::Info info;
73 OGLProgram source_program;
74 OGLAssemblyProgram assembly_program;
75 VideoCommon::ComputeUniformBufferSizes uniform_buffer_sizes{};
76
77 u32 num_texture_buffers{};
78 u32 num_image_buffers{};
79
80 bool use_storage_buffers{};
81 bool writes_global_memory{};
82};
83
84} // namespace OpenGL
85
86namespace std {
87template <>
88struct hash<OpenGL::ComputePipelineKey> {
89 size_t operator()(const OpenGL::ComputePipelineKey& k) const noexcept {
90 return k.Hash();
91 }
92};
93} // namespace std
diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp
index 3b00614e7..9692b8e94 100644
--- a/src/video_core/renderer_opengl/gl_device.cpp
+++ b/src/video_core/renderer_opengl/gl_device.cpp
@@ -17,39 +17,17 @@
17#include "common/logging/log.h" 17#include "common/logging/log.h"
18#include "common/scope_exit.h" 18#include "common/scope_exit.h"
19#include "common/settings.h" 19#include "common/settings.h"
20#include "shader_recompiler/stage.h"
20#include "video_core/renderer_opengl/gl_device.h" 21#include "video_core/renderer_opengl/gl_device.h"
21#include "video_core/renderer_opengl/gl_resource_manager.h" 22#include "video_core/renderer_opengl/gl_resource_manager.h"
22 23
23namespace OpenGL { 24namespace OpenGL {
24namespace { 25namespace {
25// One uniform block is reserved for emulation purposes
26constexpr u32 ReservedUniformBlocks = 1;
27
28constexpr u32 NumStages = 5;
29
30constexpr std::array LIMIT_UBOS = { 26constexpr std::array LIMIT_UBOS = {
31 GL_MAX_VERTEX_UNIFORM_BLOCKS, GL_MAX_TESS_CONTROL_UNIFORM_BLOCKS, 27 GL_MAX_VERTEX_UNIFORM_BLOCKS, GL_MAX_TESS_CONTROL_UNIFORM_BLOCKS,
32 GL_MAX_TESS_EVALUATION_UNIFORM_BLOCKS, GL_MAX_GEOMETRY_UNIFORM_BLOCKS, 28 GL_MAX_TESS_EVALUATION_UNIFORM_BLOCKS, GL_MAX_GEOMETRY_UNIFORM_BLOCKS,
33 GL_MAX_FRAGMENT_UNIFORM_BLOCKS, GL_MAX_COMPUTE_UNIFORM_BLOCKS, 29 GL_MAX_FRAGMENT_UNIFORM_BLOCKS, GL_MAX_COMPUTE_UNIFORM_BLOCKS,
34}; 30};
35constexpr std::array LIMIT_SSBOS = {
36 GL_MAX_VERTEX_SHADER_STORAGE_BLOCKS, GL_MAX_TESS_CONTROL_SHADER_STORAGE_BLOCKS,
37 GL_MAX_TESS_EVALUATION_SHADER_STORAGE_BLOCKS, GL_MAX_GEOMETRY_SHADER_STORAGE_BLOCKS,
38 GL_MAX_FRAGMENT_SHADER_STORAGE_BLOCKS, GL_MAX_COMPUTE_SHADER_STORAGE_BLOCKS,
39};
40constexpr std::array LIMIT_SAMPLERS = {
41 GL_MAX_VERTEX_TEXTURE_IMAGE_UNITS,
42 GL_MAX_TESS_CONTROL_TEXTURE_IMAGE_UNITS,
43 GL_MAX_TESS_EVALUATION_TEXTURE_IMAGE_UNITS,
44 GL_MAX_GEOMETRY_TEXTURE_IMAGE_UNITS,
45 GL_MAX_TEXTURE_IMAGE_UNITS,
46 GL_MAX_COMPUTE_TEXTURE_IMAGE_UNITS,
47};
48constexpr std::array LIMIT_IMAGES = {
49 GL_MAX_VERTEX_IMAGE_UNIFORMS, GL_MAX_TESS_CONTROL_IMAGE_UNIFORMS,
50 GL_MAX_TESS_EVALUATION_IMAGE_UNIFORMS, GL_MAX_GEOMETRY_IMAGE_UNIFORMS,
51 GL_MAX_FRAGMENT_IMAGE_UNIFORMS, GL_MAX_COMPUTE_IMAGE_UNIFORMS,
52};
53 31
54template <typename T> 32template <typename T>
55T GetInteger(GLenum pname) { 33T GetInteger(GLenum pname) {
@@ -82,81 +60,18 @@ bool HasExtension(std::span<const std::string_view> extensions, std::string_view
82 return std::ranges::find(extensions, extension) != extensions.end(); 60 return std::ranges::find(extensions, extension) != extensions.end();
83} 61}
84 62
85u32 Extract(u32& base, u32& num, u32 amount, std::optional<GLenum> limit = {}) { 63std::array<u32, Shader::MaxStageTypes> BuildMaxUniformBuffers() noexcept {
86 ASSERT(num >= amount); 64 std::array<u32, Shader::MaxStageTypes> max;
87 if (limit) { 65 std::ranges::transform(LIMIT_UBOS, max.begin(), &GetInteger<u32>);
88 amount = std::min(amount, GetInteger<u32>(*limit));
89 }
90 num -= amount;
91 return std::exchange(base, base + amount);
92}
93
94std::array<u32, Tegra::Engines::MaxShaderTypes> BuildMaxUniformBuffers() noexcept {
95 std::array<u32, Tegra::Engines::MaxShaderTypes> max;
96 std::ranges::transform(LIMIT_UBOS, max.begin(),
97 [](GLenum pname) { return GetInteger<u32>(pname); });
98 return max; 66 return max;
99} 67}
100 68
101std::array<Device::BaseBindings, Tegra::Engines::MaxShaderTypes> BuildBaseBindings() noexcept {
102 std::array<Device::BaseBindings, Tegra::Engines::MaxShaderTypes> bindings;
103
104 static constexpr std::array<std::size_t, 5> stage_swizzle{0, 1, 2, 3, 4};
105 const u32 total_ubos = GetInteger<u32>(GL_MAX_UNIFORM_BUFFER_BINDINGS);
106 const u32 total_ssbos = GetInteger<u32>(GL_MAX_SHADER_STORAGE_BUFFER_BINDINGS);
107 const u32 total_samplers = GetInteger<u32>(GL_MAX_COMBINED_TEXTURE_IMAGE_UNITS);
108
109 u32 num_ubos = total_ubos - ReservedUniformBlocks;
110 u32 num_ssbos = total_ssbos;
111 u32 num_samplers = total_samplers;
112
113 u32 base_ubo = ReservedUniformBlocks;
114 u32 base_ssbo = 0;
115 u32 base_samplers = 0;
116
117 for (std::size_t i = 0; i < NumStages; ++i) {
118 const std::size_t stage = stage_swizzle[i];
119 bindings[stage] = {
120 Extract(base_ubo, num_ubos, total_ubos / NumStages, LIMIT_UBOS[stage]),
121 Extract(base_ssbo, num_ssbos, total_ssbos / NumStages, LIMIT_SSBOS[stage]),
122 Extract(base_samplers, num_samplers, total_samplers / NumStages,
123 LIMIT_SAMPLERS[stage])};
124 }
125
126 u32 num_images = GetInteger<u32>(GL_MAX_IMAGE_UNITS);
127 u32 base_images = 0;
128
129 // GL_MAX_IMAGE_UNITS is guaranteed by the spec to have a minimum value of 8.
130 // Due to the limitation of GL_MAX_IMAGE_UNITS, reserve at least 4 image bindings on the
131 // fragment stage, and at least 1 for the rest of the stages.
132 // So far games are observed to use 1 image binding on vertex and 4 on fragment stages.
133
134 // Reserve at least 4 image bindings on the fragment stage.
135 bindings[4].image =
136 Extract(base_images, num_images, std::max(4U, num_images / NumStages), LIMIT_IMAGES[4]);
137
138 // This is guaranteed to be at least 1.
139 const u32 total_extracted_images = num_images / (NumStages - 1);
140
141 // Reserve the other image bindings.
142 for (std::size_t i = 0; i < NumStages; ++i) {
143 const std::size_t stage = stage_swizzle[i];
144 if (stage == 4) {
145 continue;
146 }
147 bindings[stage].image =
148 Extract(base_images, num_images, total_extracted_images, LIMIT_IMAGES[stage]);
149 }
150
151 // Compute doesn't care about any of this.
152 bindings[5] = {0, 0, 0, 0};
153
154 return bindings;
155}
156
157bool IsASTCSupported() { 69bool IsASTCSupported() {
158 static constexpr std::array targets = {GL_TEXTURE_2D, GL_TEXTURE_2D_ARRAY}; 70 static constexpr std::array targets{
159 static constexpr std::array formats = { 71 GL_TEXTURE_2D,
72 GL_TEXTURE_2D_ARRAY,
73 };
74 static constexpr std::array formats{
160 GL_COMPRESSED_RGBA_ASTC_4x4_KHR, GL_COMPRESSED_RGBA_ASTC_5x4_KHR, 75 GL_COMPRESSED_RGBA_ASTC_4x4_KHR, GL_COMPRESSED_RGBA_ASTC_5x4_KHR,
161 GL_COMPRESSED_RGBA_ASTC_5x5_KHR, GL_COMPRESSED_RGBA_ASTC_6x5_KHR, 76 GL_COMPRESSED_RGBA_ASTC_5x5_KHR, GL_COMPRESSED_RGBA_ASTC_6x5_KHR,
162 GL_COMPRESSED_RGBA_ASTC_6x6_KHR, GL_COMPRESSED_RGBA_ASTC_8x5_KHR, 77 GL_COMPRESSED_RGBA_ASTC_6x6_KHR, GL_COMPRESSED_RGBA_ASTC_8x5_KHR,
@@ -172,11 +87,10 @@ bool IsASTCSupported() {
172 GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x8_KHR, GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x10_KHR, 87 GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x8_KHR, GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x10_KHR,
173 GL_COMPRESSED_SRGB8_ALPHA8_ASTC_12x10_KHR, GL_COMPRESSED_SRGB8_ALPHA8_ASTC_12x12_KHR, 88 GL_COMPRESSED_SRGB8_ALPHA8_ASTC_12x10_KHR, GL_COMPRESSED_SRGB8_ALPHA8_ASTC_12x12_KHR,
174 }; 89 };
175 static constexpr std::array required_support = { 90 static constexpr std::array required_support{
176 GL_VERTEX_TEXTURE, GL_TESS_CONTROL_TEXTURE, GL_TESS_EVALUATION_TEXTURE, 91 GL_VERTEX_TEXTURE, GL_TESS_CONTROL_TEXTURE, GL_TESS_EVALUATION_TEXTURE,
177 GL_GEOMETRY_TEXTURE, GL_FRAGMENT_TEXTURE, GL_COMPUTE_TEXTURE, 92 GL_GEOMETRY_TEXTURE, GL_FRAGMENT_TEXTURE, GL_COMPUTE_TEXTURE,
178 }; 93 };
179
180 for (const GLenum target : targets) { 94 for (const GLenum target : targets) {
181 for (const GLenum format : formats) { 95 for (const GLenum format : formats) {
182 for (const GLenum support : required_support) { 96 for (const GLenum support : required_support) {
@@ -223,14 +137,13 @@ Device::Device() {
223 "Beta driver 443.24 is known to have issues. There might be performance issues."); 137 "Beta driver 443.24 is known to have issues. There might be performance issues.");
224 disable_fast_buffer_sub_data = true; 138 disable_fast_buffer_sub_data = true;
225 } 139 }
226
227 max_uniform_buffers = BuildMaxUniformBuffers(); 140 max_uniform_buffers = BuildMaxUniformBuffers();
228 base_bindings = BuildBaseBindings();
229 uniform_buffer_alignment = GetInteger<size_t>(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT); 141 uniform_buffer_alignment = GetInteger<size_t>(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT);
230 shader_storage_alignment = GetInteger<size_t>(GL_SHADER_STORAGE_BUFFER_OFFSET_ALIGNMENT); 142 shader_storage_alignment = GetInteger<size_t>(GL_SHADER_STORAGE_BUFFER_OFFSET_ALIGNMENT);
231 max_vertex_attributes = GetInteger<u32>(GL_MAX_VERTEX_ATTRIBS); 143 max_vertex_attributes = GetInteger<u32>(GL_MAX_VERTEX_ATTRIBS);
232 max_varyings = GetInteger<u32>(GL_MAX_VARYING_VECTORS); 144 max_varyings = GetInteger<u32>(GL_MAX_VARYING_VECTORS);
233 max_compute_shared_memory_size = GetInteger<u32>(GL_MAX_COMPUTE_SHARED_MEMORY_SIZE); 145 max_compute_shared_memory_size = GetInteger<u32>(GL_MAX_COMPUTE_SHARED_MEMORY_SIZE);
146 max_glasm_storage_buffer_blocks = GetInteger<u32>(GL_MAX_VERTEX_SHADER_STORAGE_BLOCKS);
234 has_warp_intrinsics = GLAD_GL_NV_gpu_shader5 && GLAD_GL_NV_shader_thread_group && 147 has_warp_intrinsics = GLAD_GL_NV_gpu_shader5 && GLAD_GL_NV_shader_thread_group &&
235 GLAD_GL_NV_shader_thread_shuffle; 148 GLAD_GL_NV_shader_thread_shuffle;
236 has_shader_ballot = GLAD_GL_ARB_shader_ballot; 149 has_shader_ballot = GLAD_GL_ARB_shader_ballot;
@@ -243,18 +156,30 @@ Device::Device() {
243 has_precise_bug = TestPreciseBug(); 156 has_precise_bug = TestPreciseBug();
244 has_broken_texture_view_formats = is_amd || (!is_linux && is_intel); 157 has_broken_texture_view_formats = is_amd || (!is_linux && is_intel);
245 has_nv_viewport_array2 = GLAD_GL_NV_viewport_array2; 158 has_nv_viewport_array2 = GLAD_GL_NV_viewport_array2;
159 has_derivative_control = GLAD_GL_ARB_derivative_control;
246 has_vertex_buffer_unified_memory = GLAD_GL_NV_vertex_buffer_unified_memory; 160 has_vertex_buffer_unified_memory = GLAD_GL_NV_vertex_buffer_unified_memory;
247 has_debugging_tool_attached = IsDebugToolAttached(extensions); 161 has_debugging_tool_attached = IsDebugToolAttached(extensions);
248 has_depth_buffer_float = HasExtension(extensions, "GL_NV_depth_buffer_float"); 162 has_depth_buffer_float = HasExtension(extensions, "GL_NV_depth_buffer_float");
163 has_geometry_shader_passthrough = GLAD_GL_NV_geometry_shader_passthrough;
164 has_nv_gpu_shader_5 = GLAD_GL_NV_gpu_shader5;
165 has_shader_int64 = HasExtension(extensions, "GL_ARB_gpu_shader_int64");
166 has_amd_shader_half_float = GLAD_GL_AMD_gpu_shader_half_float;
167 has_sparse_texture_2 = GLAD_GL_ARB_sparse_texture2;
168 warp_size_potentially_larger_than_guest = !is_nvidia && !is_intel;
169 need_fastmath_off = is_nvidia;
249 170
250 // At the moment of writing this, only Nvidia's driver optimizes BufferSubData on exclusive 171 // At the moment of writing this, only Nvidia's driver optimizes BufferSubData on exclusive
251 // uniform buffers as "push constants" 172 // uniform buffers as "push constants"
252 has_fast_buffer_sub_data = is_nvidia && !disable_fast_buffer_sub_data; 173 has_fast_buffer_sub_data = is_nvidia && !disable_fast_buffer_sub_data;
253 174
254 use_assembly_shaders = Settings::values.use_assembly_shaders.GetValue() && 175 shader_backend = Settings::values.shader_backend.GetValue();
176 use_assembly_shaders = shader_backend == Settings::ShaderBackend::GLASM &&
255 GLAD_GL_NV_gpu_program5 && GLAD_GL_NV_compute_program5 && 177 GLAD_GL_NV_gpu_program5 && GLAD_GL_NV_compute_program5 &&
256 GLAD_GL_NV_transform_feedback && GLAD_GL_NV_transform_feedback2; 178 GLAD_GL_NV_transform_feedback && GLAD_GL_NV_transform_feedback2;
257 179 if (shader_backend == Settings::ShaderBackend::GLASM && !use_assembly_shaders) {
180 LOG_ERROR(Render_OpenGL, "Assembly shaders enabled but not supported");
181 shader_backend = Settings::ShaderBackend::GLSL;
182 }
258 // Blocks AMD and Intel OpenGL drivers on Windows from using asynchronous shader compilation. 183 // Blocks AMD and Intel OpenGL drivers on Windows from using asynchronous shader compilation.
259 use_asynchronous_shaders = Settings::values.use_asynchronous_shaders.GetValue() && 184 use_asynchronous_shaders = Settings::values.use_asynchronous_shaders.GetValue() &&
260 !(is_amd || (is_intel && !is_linux)); 185 !(is_amd || (is_intel && !is_linux));
@@ -265,11 +190,6 @@ Device::Device() {
265 LOG_INFO(Render_OpenGL, "Renderer_PreciseBug: {}", has_precise_bug); 190 LOG_INFO(Render_OpenGL, "Renderer_PreciseBug: {}", has_precise_bug);
266 LOG_INFO(Render_OpenGL, "Renderer_BrokenTextureViewFormats: {}", 191 LOG_INFO(Render_OpenGL, "Renderer_BrokenTextureViewFormats: {}",
267 has_broken_texture_view_formats); 192 has_broken_texture_view_formats);
268
269 if (Settings::values.use_assembly_shaders.GetValue() && !use_assembly_shaders) {
270 LOG_ERROR(Render_OpenGL, "Assembly shaders enabled but not supported");
271 }
272
273 if (Settings::values.use_asynchronous_shaders.GetValue() && !use_asynchronous_shaders) { 193 if (Settings::values.use_asynchronous_shaders.GetValue() && !use_asynchronous_shaders) {
274 LOG_WARNING(Render_OpenGL, "Asynchronous shader compilation enabled but not supported"); 194 LOG_WARNING(Render_OpenGL, "Asynchronous shader compilation enabled but not supported");
275 } 195 }
@@ -325,22 +245,6 @@ std::string Device::GetVendorName() const {
325 return vendor_name; 245 return vendor_name;
326} 246}
327 247
328Device::Device(std::nullptr_t) {
329 max_uniform_buffers.fill(std::numeric_limits<u32>::max());
330 uniform_buffer_alignment = 4;
331 shader_storage_alignment = 4;
332 max_vertex_attributes = 16;
333 max_varyings = 15;
334 max_compute_shared_memory_size = 0x10000;
335 has_warp_intrinsics = true;
336 has_shader_ballot = true;
337 has_vertex_viewport_layer = true;
338 has_image_load_formatted = true;
339 has_texture_shadow_lod = true;
340 has_variable_aoffi = true;
341 has_depth_buffer_float = true;
342}
343
344bool Device::TestVariableAoffi() { 248bool Device::TestVariableAoffi() {
345 return TestProgram(R"(#version 430 core 249 return TestProgram(R"(#version 430 core
346// This is a unit test, please ignore me on apitrace bug reports. 250// This is a unit test, please ignore me on apitrace bug reports.
diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h
index 2c2b13767..ee992aed4 100644
--- a/src/video_core/renderer_opengl/gl_device.h
+++ b/src/video_core/renderer_opengl/gl_device.h
@@ -6,34 +6,22 @@
6 6
7#include <cstddef> 7#include <cstddef>
8#include "common/common_types.h" 8#include "common/common_types.h"
9#include "video_core/engines/shader_type.h" 9#include "shader_recompiler/stage.h"
10
11namespace Settings {
12enum class ShaderBackend : u32;
13};
10 14
11namespace OpenGL { 15namespace OpenGL {
12 16
13class Device { 17class Device {
14public: 18public:
15 struct BaseBindings {
16 u32 uniform_buffer{};
17 u32 shader_storage_buffer{};
18 u32 sampler{};
19 u32 image{};
20 };
21
22 explicit Device(); 19 explicit Device();
23 explicit Device(std::nullptr_t);
24 20
25 [[nodiscard]] std::string GetVendorName() const; 21 [[nodiscard]] std::string GetVendorName() const;
26 22
27 u32 GetMaxUniformBuffers(Tegra::Engines::ShaderType shader_type) const noexcept { 23 u32 GetMaxUniformBuffers(Shader::Stage stage) const noexcept {
28 return max_uniform_buffers[static_cast<std::size_t>(shader_type)]; 24 return max_uniform_buffers[static_cast<size_t>(stage)];
29 }
30
31 const BaseBindings& GetBaseBindings(std::size_t stage_index) const noexcept {
32 return base_bindings[stage_index];
33 }
34
35 const BaseBindings& GetBaseBindings(Tegra::Engines::ShaderType shader_type) const noexcept {
36 return GetBaseBindings(static_cast<std::size_t>(shader_type));
37 } 25 }
38 26
39 size_t GetUniformBufferAlignment() const { 27 size_t GetUniformBufferAlignment() const {
@@ -56,6 +44,10 @@ public:
56 return max_compute_shared_memory_size; 44 return max_compute_shared_memory_size;
57 } 45 }
58 46
47 u32 GetMaxGLASMStorageBufferBlocks() const {
48 return max_glasm_storage_buffer_blocks;
49 }
50
59 bool HasWarpIntrinsics() const { 51 bool HasWarpIntrinsics() const {
60 return has_warp_intrinsics; 52 return has_warp_intrinsics;
61 } 53 }
@@ -108,6 +100,10 @@ public:
108 return has_nv_viewport_array2; 100 return has_nv_viewport_array2;
109 } 101 }
110 102
103 bool HasDerivativeControl() const {
104 return has_derivative_control;
105 }
106
111 bool HasDebuggingToolAttached() const { 107 bool HasDebuggingToolAttached() const {
112 return has_debugging_tool_attached; 108 return has_debugging_tool_attached;
113 } 109 }
@@ -128,18 +124,52 @@ public:
128 return has_depth_buffer_float; 124 return has_depth_buffer_float;
129 } 125 }
130 126
127 bool HasGeometryShaderPassthrough() const {
128 return has_geometry_shader_passthrough;
129 }
130
131 bool HasNvGpuShader5() const {
132 return has_nv_gpu_shader_5;
133 }
134
135 bool HasShaderInt64() const {
136 return has_shader_int64;
137 }
138
139 bool HasAmdShaderHalfFloat() const {
140 return has_amd_shader_half_float;
141 }
142
143 bool HasSparseTexture2() const {
144 return has_sparse_texture_2;
145 }
146
147 bool IsWarpSizePotentiallyLargerThanGuest() const {
148 return warp_size_potentially_larger_than_guest;
149 }
150
151 bool NeedsFastmathOff() const {
152 return need_fastmath_off;
153 }
154
155 Settings::ShaderBackend GetShaderBackend() const {
156 return shader_backend;
157 }
158
131private: 159private:
132 static bool TestVariableAoffi(); 160 static bool TestVariableAoffi();
133 static bool TestPreciseBug(); 161 static bool TestPreciseBug();
134 162
135 std::string vendor_name; 163 std::array<u32, Shader::MaxStageTypes> max_uniform_buffers{};
136 std::array<u32, Tegra::Engines::MaxShaderTypes> max_uniform_buffers{};
137 std::array<BaseBindings, Tegra::Engines::MaxShaderTypes> base_bindings{};
138 size_t uniform_buffer_alignment{}; 164 size_t uniform_buffer_alignment{};
139 size_t shader_storage_alignment{}; 165 size_t shader_storage_alignment{};
140 u32 max_vertex_attributes{}; 166 u32 max_vertex_attributes{};
141 u32 max_varyings{}; 167 u32 max_varyings{};
142 u32 max_compute_shared_memory_size{}; 168 u32 max_compute_shared_memory_size{};
169 u32 max_glasm_storage_buffer_blocks{};
170
171 Settings::ShaderBackend shader_backend{};
172
143 bool has_warp_intrinsics{}; 173 bool has_warp_intrinsics{};
144 bool has_shader_ballot{}; 174 bool has_shader_ballot{};
145 bool has_vertex_viewport_layer{}; 175 bool has_vertex_viewport_layer{};
@@ -153,11 +183,21 @@ private:
153 bool has_broken_texture_view_formats{}; 183 bool has_broken_texture_view_formats{};
154 bool has_fast_buffer_sub_data{}; 184 bool has_fast_buffer_sub_data{};
155 bool has_nv_viewport_array2{}; 185 bool has_nv_viewport_array2{};
186 bool has_derivative_control{};
156 bool has_debugging_tool_attached{}; 187 bool has_debugging_tool_attached{};
157 bool use_assembly_shaders{}; 188 bool use_assembly_shaders{};
158 bool use_asynchronous_shaders{}; 189 bool use_asynchronous_shaders{};
159 bool use_driver_cache{}; 190 bool use_driver_cache{};
160 bool has_depth_buffer_float{}; 191 bool has_depth_buffer_float{};
192 bool has_geometry_shader_passthrough{};
193 bool has_nv_gpu_shader_5{};
194 bool has_shader_int64{};
195 bool has_amd_shader_half_float{};
196 bool has_sparse_texture_2{};
197 bool warp_size_potentially_larger_than_guest{};
198 bool need_fastmath_off{};
199
200 std::string vendor_name;
161}; 201};
162 202
163} // namespace OpenGL 203} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp
new file mode 100644
index 000000000..fac0034fb
--- /dev/null
+++ b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp
@@ -0,0 +1,572 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <algorithm>
6#include <array>
7#include <string>
8#include <vector>
9
10#include "common/settings.h" // for enum class Settings::ShaderBackend
11#include "common/thread_worker.h"
12#include "shader_recompiler/shader_info.h"
13#include "video_core/renderer_opengl/gl_graphics_pipeline.h"
14#include "video_core/renderer_opengl/gl_shader_manager.h"
15#include "video_core/renderer_opengl/gl_shader_util.h"
16#include "video_core/renderer_opengl/gl_state_tracker.h"
17#include "video_core/shader_notify.h"
18#include "video_core/texture_cache/texture_cache.h"
19
20#if defined(_MSC_VER) && defined(NDEBUG)
21#define LAMBDA_FORCEINLINE [[msvc::forceinline]]
22#else
23#define LAMBDA_FORCEINLINE
24#endif
25
26namespace OpenGL {
27namespace {
28using Shader::ImageBufferDescriptor;
29using Shader::ImageDescriptor;
30using Shader::TextureBufferDescriptor;
31using Shader::TextureDescriptor;
32using Tegra::Texture::TexturePair;
33using VideoCommon::ImageId;
34
35constexpr u32 MAX_TEXTURES = 64;
36constexpr u32 MAX_IMAGES = 8;
37
38template <typename Range>
39u32 AccumulateCount(const Range& range) {
40 u32 num{};
41 for (const auto& desc : range) {
42 num += desc.count;
43 }
44 return num;
45}
46
47GLenum Stage(size_t stage_index) {
48 switch (stage_index) {
49 case 0:
50 return GL_VERTEX_SHADER;
51 case 1:
52 return GL_TESS_CONTROL_SHADER;
53 case 2:
54 return GL_TESS_EVALUATION_SHADER;
55 case 3:
56 return GL_GEOMETRY_SHADER;
57 case 4:
58 return GL_FRAGMENT_SHADER;
59 }
60 UNREACHABLE_MSG("{}", stage_index);
61 return GL_NONE;
62}
63
64GLenum AssemblyStage(size_t stage_index) {
65 switch (stage_index) {
66 case 0:
67 return GL_VERTEX_PROGRAM_NV;
68 case 1:
69 return GL_TESS_CONTROL_PROGRAM_NV;
70 case 2:
71 return GL_TESS_EVALUATION_PROGRAM_NV;
72 case 3:
73 return GL_GEOMETRY_PROGRAM_NV;
74 case 4:
75 return GL_FRAGMENT_PROGRAM_NV;
76 }
77 UNREACHABLE_MSG("{}", stage_index);
78 return GL_NONE;
79}
80
81/// Translates hardware transform feedback indices
82/// @param location Hardware location
83/// @return Pair of ARB_transform_feedback3 token stream first and third arguments
84/// @note Read https://www.khronos.org/registry/OpenGL/extensions/ARB/ARB_transform_feedback3.txt
85std::pair<GLint, GLint> TransformFeedbackEnum(u8 location) {
86 const u8 index = location / 4;
87 if (index >= 8 && index <= 39) {
88 return {GL_GENERIC_ATTRIB_NV, index - 8};
89 }
90 if (index >= 48 && index <= 55) {
91 return {GL_TEXTURE_COORD_NV, index - 48};
92 }
93 switch (index) {
94 case 7:
95 return {GL_POSITION, 0};
96 case 40:
97 return {GL_PRIMARY_COLOR_NV, 0};
98 case 41:
99 return {GL_SECONDARY_COLOR_NV, 0};
100 case 42:
101 return {GL_BACK_PRIMARY_COLOR_NV, 0};
102 case 43:
103 return {GL_BACK_SECONDARY_COLOR_NV, 0};
104 }
105 UNIMPLEMENTED_MSG("index={}", index);
106 return {GL_POSITION, 0};
107}
108
109template <typename Spec>
110bool Passes(const std::array<Shader::Info, 5>& stage_infos, u32 enabled_mask) {
111 for (size_t stage = 0; stage < stage_infos.size(); ++stage) {
112 if (!Spec::enabled_stages[stage] && ((enabled_mask >> stage) & 1) != 0) {
113 return false;
114 }
115 const auto& info{stage_infos[stage]};
116 if constexpr (!Spec::has_storage_buffers) {
117 if (!info.storage_buffers_descriptors.empty()) {
118 return false;
119 }
120 }
121 if constexpr (!Spec::has_texture_buffers) {
122 if (!info.texture_buffer_descriptors.empty()) {
123 return false;
124 }
125 }
126 if constexpr (!Spec::has_image_buffers) {
127 if (!info.image_buffer_descriptors.empty()) {
128 return false;
129 }
130 }
131 if constexpr (!Spec::has_images) {
132 if (!info.image_descriptors.empty()) {
133 return false;
134 }
135 }
136 }
137 return true;
138}
139
140using ConfigureFuncPtr = void (*)(GraphicsPipeline*, bool);
141
142template <typename Spec, typename... Specs>
143ConfigureFuncPtr FindSpec(const std::array<Shader::Info, 5>& stage_infos, u32 enabled_mask) {
144 if constexpr (sizeof...(Specs) > 0) {
145 if (!Passes<Spec>(stage_infos, enabled_mask)) {
146 return FindSpec<Specs...>(stage_infos, enabled_mask);
147 }
148 }
149 return GraphicsPipeline::MakeConfigureSpecFunc<Spec>();
150}
151
152struct SimpleVertexFragmentSpec {
153 static constexpr std::array<bool, 5> enabled_stages{true, false, false, false, true};
154 static constexpr bool has_storage_buffers = false;
155 static constexpr bool has_texture_buffers = false;
156 static constexpr bool has_image_buffers = false;
157 static constexpr bool has_images = false;
158};
159
160struct SimpleVertexSpec {
161 static constexpr std::array<bool, 5> enabled_stages{true, false, false, false, false};
162 static constexpr bool has_storage_buffers = false;
163 static constexpr bool has_texture_buffers = false;
164 static constexpr bool has_image_buffers = false;
165 static constexpr bool has_images = false;
166};
167
168struct DefaultSpec {
169 static constexpr std::array<bool, 5> enabled_stages{true, true, true, true, true};
170 static constexpr bool has_storage_buffers = true;
171 static constexpr bool has_texture_buffers = true;
172 static constexpr bool has_image_buffers = true;
173 static constexpr bool has_images = true;
174};
175
176ConfigureFuncPtr ConfigureFunc(const std::array<Shader::Info, 5>& infos, u32 enabled_mask) {
177 return FindSpec<SimpleVertexSpec, SimpleVertexFragmentSpec, DefaultSpec>(infos, enabled_mask);
178}
179} // Anonymous namespace
180
181GraphicsPipeline::GraphicsPipeline(
182 const Device& device, TextureCache& texture_cache_, BufferCache& buffer_cache_,
183 Tegra::MemoryManager& gpu_memory_, Tegra::Engines::Maxwell3D& maxwell3d_,
184 ProgramManager& program_manager_, StateTracker& state_tracker_, ShaderWorker* thread_worker,
185 VideoCore::ShaderNotify* shader_notify, std::array<std::string, 5> sources,
186 std::array<std::vector<u32>, 5> sources_spirv, const std::array<const Shader::Info*, 5>& infos,
187 const GraphicsPipelineKey& key_)
188 : texture_cache{texture_cache_}, buffer_cache{buffer_cache_},
189 gpu_memory{gpu_memory_}, maxwell3d{maxwell3d_}, program_manager{program_manager_},
190 state_tracker{state_tracker_}, key{key_} {
191 if (shader_notify) {
192 shader_notify->MarkShaderBuilding();
193 }
194 u32 num_textures{};
195 u32 num_images{};
196 u32 num_storage_buffers{};
197 for (size_t stage = 0; stage < base_uniform_bindings.size(); ++stage) {
198 auto& info{stage_infos[stage]};
199 if (infos[stage]) {
200 info = *infos[stage];
201 enabled_stages_mask |= 1u << stage;
202 }
203 if (stage < 4) {
204 base_uniform_bindings[stage + 1] = base_uniform_bindings[stage];
205 base_storage_bindings[stage + 1] = base_storage_bindings[stage];
206
207 base_uniform_bindings[stage + 1] += AccumulateCount(info.constant_buffer_descriptors);
208 base_storage_bindings[stage + 1] += AccumulateCount(info.storage_buffers_descriptors);
209 }
210 enabled_uniform_buffer_masks[stage] = info.constant_buffer_mask;
211 std::ranges::copy(info.constant_buffer_used_sizes, uniform_buffer_sizes[stage].begin());
212
213 const u32 num_tex_buffer_bindings{AccumulateCount(info.texture_buffer_descriptors)};
214 num_texture_buffers[stage] += num_tex_buffer_bindings;
215 num_textures += num_tex_buffer_bindings;
216
217 const u32 num_img_buffers_bindings{AccumulateCount(info.image_buffer_descriptors)};
218 num_image_buffers[stage] += num_img_buffers_bindings;
219 num_images += num_img_buffers_bindings;
220
221 num_textures += AccumulateCount(info.texture_descriptors);
222 num_images += AccumulateCount(info.image_descriptors);
223 num_storage_buffers += AccumulateCount(info.storage_buffers_descriptors);
224
225 writes_global_memory |= std::ranges::any_of(
226 info.storage_buffers_descriptors, [](const auto& desc) { return desc.is_written; });
227 }
228 ASSERT(num_textures <= MAX_TEXTURES);
229 ASSERT(num_images <= MAX_IMAGES);
230
231 const bool assembly_shaders{assembly_programs[0].handle != 0};
232 use_storage_buffers =
233 !assembly_shaders || num_storage_buffers <= device.GetMaxGLASMStorageBufferBlocks();
234 writes_global_memory &= !use_storage_buffers;
235 configure_func = ConfigureFunc(stage_infos, enabled_stages_mask);
236
237 if (key.xfb_enabled && device.UseAssemblyShaders()) {
238 GenerateTransformFeedbackState();
239 }
240 const bool in_parallel = thread_worker != nullptr;
241 const auto backend = device.GetShaderBackend();
242 auto func{[this, sources = std::move(sources), sources_spirv = std::move(sources_spirv),
243 shader_notify, backend, in_parallel](ShaderContext::Context*) mutable {
244 for (size_t stage = 0; stage < 5; ++stage) {
245 switch (backend) {
246 case Settings::ShaderBackend::GLSL:
247 if (!sources[stage].empty()) {
248 source_programs[stage] = CreateProgram(sources[stage], Stage(stage));
249 }
250 break;
251 case Settings::ShaderBackend::GLASM:
252 if (!sources[stage].empty()) {
253 assembly_programs[stage] = CompileProgram(sources[stage], AssemblyStage(stage));
254 if (in_parallel) {
255 // Make sure program is built before continuing when building in parallel
256 glGetString(GL_PROGRAM_ERROR_STRING_NV);
257 }
258 }
259 break;
260 case Settings::ShaderBackend::SPIRV:
261 if (!sources_spirv[stage].empty()) {
262 source_programs[stage] = CreateProgram(sources_spirv[stage], Stage(stage));
263 }
264 break;
265 }
266 }
267 if (in_parallel && backend != Settings::ShaderBackend::GLASM) {
268 // Make sure programs have built if we are building shaders in parallel
269 for (OGLProgram& program : source_programs) {
270 if (program.handle != 0) {
271 GLint status{};
272 glGetProgramiv(program.handle, GL_LINK_STATUS, &status);
273 }
274 }
275 }
276 if (shader_notify) {
277 shader_notify->MarkShaderComplete();
278 }
279 is_built = true;
280 built_condvar.notify_one();
281 }};
282 if (thread_worker) {
283 thread_worker->QueueWork(std::move(func));
284 } else {
285 func(nullptr);
286 }
287}
288
289template <typename Spec>
290void GraphicsPipeline::ConfigureImpl(bool is_indexed) {
291 std::array<ImageId, MAX_TEXTURES + MAX_IMAGES> image_view_ids;
292 std::array<u32, MAX_TEXTURES + MAX_IMAGES> image_view_indices;
293 std::array<GLuint, MAX_TEXTURES> samplers;
294 size_t image_view_index{};
295 GLsizei sampler_binding{};
296
297 texture_cache.SynchronizeGraphicsDescriptors();
298
299 buffer_cache.SetUniformBuffersState(enabled_uniform_buffer_masks, &uniform_buffer_sizes);
300 buffer_cache.runtime.SetBaseUniformBindings(base_uniform_bindings);
301 buffer_cache.runtime.SetBaseStorageBindings(base_storage_bindings);
302 buffer_cache.runtime.SetEnableStorageBuffers(use_storage_buffers);
303
304 const auto& regs{maxwell3d.regs};
305 const bool via_header_index{regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex};
306 const auto config_stage{[&](size_t stage) LAMBDA_FORCEINLINE {
307 const Shader::Info& info{stage_infos[stage]};
308 buffer_cache.UnbindGraphicsStorageBuffers(stage);
309 if constexpr (Spec::has_storage_buffers) {
310 size_t ssbo_index{};
311 for (const auto& desc : info.storage_buffers_descriptors) {
312 ASSERT(desc.count == 1);
313 buffer_cache.BindGraphicsStorageBuffer(stage, ssbo_index, desc.cbuf_index,
314 desc.cbuf_offset, desc.is_written);
315 ++ssbo_index;
316 }
317 }
318 const auto& cbufs{maxwell3d.state.shader_stages[stage].const_buffers};
319 const auto read_handle{[&](const auto& desc, u32 index) {
320 ASSERT(cbufs[desc.cbuf_index].enabled);
321 const u32 index_offset{index << desc.size_shift};
322 const u32 offset{desc.cbuf_offset + index_offset};
323 const GPUVAddr addr{cbufs[desc.cbuf_index].address + offset};
324 if constexpr (std::is_same_v<decltype(desc), const TextureDescriptor&> ||
325 std::is_same_v<decltype(desc), const TextureBufferDescriptor&>) {
326 if (desc.has_secondary) {
327 ASSERT(cbufs[desc.secondary_cbuf_index].enabled);
328 const u32 second_offset{desc.secondary_cbuf_offset + index_offset};
329 const GPUVAddr separate_addr{cbufs[desc.secondary_cbuf_index].address +
330 second_offset};
331 const u32 lhs_raw{gpu_memory.Read<u32>(addr)};
332 const u32 rhs_raw{gpu_memory.Read<u32>(separate_addr)};
333 const u32 raw{lhs_raw | rhs_raw};
334 return TexturePair(raw, via_header_index);
335 }
336 }
337 return TexturePair(gpu_memory.Read<u32>(addr), via_header_index);
338 }};
339 const auto add_image{[&](const auto& desc) {
340 for (u32 index = 0; index < desc.count; ++index) {
341 const auto handle{read_handle(desc, index)};
342 image_view_indices[image_view_index++] = handle.first;
343 }
344 }};
345 if constexpr (Spec::has_texture_buffers) {
346 for (const auto& desc : info.texture_buffer_descriptors) {
347 for (u32 index = 0; index < desc.count; ++index) {
348 const auto handle{read_handle(desc, index)};
349 image_view_indices[image_view_index++] = handle.first;
350 samplers[sampler_binding++] = 0;
351 }
352 }
353 }
354 if constexpr (Spec::has_image_buffers) {
355 for (const auto& desc : info.image_buffer_descriptors) {
356 add_image(desc);
357 }
358 }
359 for (const auto& desc : info.texture_descriptors) {
360 for (u32 index = 0; index < desc.count; ++index) {
361 const auto handle{read_handle(desc, index)};
362 image_view_indices[image_view_index++] = handle.first;
363
364 Sampler* const sampler{texture_cache.GetGraphicsSampler(handle.second)};
365 samplers[sampler_binding++] = sampler->Handle();
366 }
367 }
368 if constexpr (Spec::has_images) {
369 for (const auto& desc : info.image_descriptors) {
370 add_image(desc);
371 }
372 }
373 }};
374 if constexpr (Spec::enabled_stages[0]) {
375 config_stage(0);
376 }
377 if constexpr (Spec::enabled_stages[1]) {
378 config_stage(1);
379 }
380 if constexpr (Spec::enabled_stages[2]) {
381 config_stage(2);
382 }
383 if constexpr (Spec::enabled_stages[3]) {
384 config_stage(3);
385 }
386 if constexpr (Spec::enabled_stages[4]) {
387 config_stage(4);
388 }
389 const std::span indices_span(image_view_indices.data(), image_view_index);
390 texture_cache.FillGraphicsImageViews(indices_span, image_view_ids);
391
392 texture_cache.UpdateRenderTargets(false);
393 state_tracker.BindFramebuffer(texture_cache.GetFramebuffer()->Handle());
394
395 ImageId* texture_buffer_index{image_view_ids.data()};
396 const auto bind_stage_info{[&](size_t stage) LAMBDA_FORCEINLINE {
397 size_t index{};
398 const auto add_buffer{[&](const auto& desc) {
399 constexpr bool is_image = std::is_same_v<decltype(desc), const ImageBufferDescriptor&>;
400 for (u32 i = 0; i < desc.count; ++i) {
401 bool is_written{false};
402 if constexpr (is_image) {
403 is_written = desc.is_written;
404 }
405 ImageView& image_view{texture_cache.GetImageView(*texture_buffer_index)};
406 buffer_cache.BindGraphicsTextureBuffer(stage, index, image_view.GpuAddr(),
407 image_view.BufferSize(), image_view.format,
408 is_written, is_image);
409 ++index;
410 ++texture_buffer_index;
411 }
412 }};
413 const Shader::Info& info{stage_infos[stage]};
414 buffer_cache.UnbindGraphicsTextureBuffers(stage);
415
416 if constexpr (Spec::has_texture_buffers) {
417 for (const auto& desc : info.texture_buffer_descriptors) {
418 add_buffer(desc);
419 }
420 }
421 if constexpr (Spec::has_image_buffers) {
422 for (const auto& desc : info.image_buffer_descriptors) {
423 add_buffer(desc);
424 }
425 }
426 for (const auto& desc : info.texture_descriptors) {
427 texture_buffer_index += desc.count;
428 }
429 if constexpr (Spec::has_images) {
430 for (const auto& desc : info.image_descriptors) {
431 texture_buffer_index += desc.count;
432 }
433 }
434 }};
435 if constexpr (Spec::enabled_stages[0]) {
436 bind_stage_info(0);
437 }
438 if constexpr (Spec::enabled_stages[1]) {
439 bind_stage_info(1);
440 }
441 if constexpr (Spec::enabled_stages[2]) {
442 bind_stage_info(2);
443 }
444 if constexpr (Spec::enabled_stages[3]) {
445 bind_stage_info(3);
446 }
447 if constexpr (Spec::enabled_stages[4]) {
448 bind_stage_info(4);
449 }
450 buffer_cache.UpdateGraphicsBuffers(is_indexed);
451 buffer_cache.BindHostGeometryBuffers(is_indexed);
452
453 if (!is_built.load(std::memory_order::relaxed)) {
454 WaitForBuild();
455 }
456 if (assembly_programs[0].handle != 0) {
457 program_manager.BindAssemblyPrograms(assembly_programs, enabled_stages_mask);
458 } else {
459 program_manager.BindSourcePrograms(source_programs);
460 }
461 const ImageId* views_it{image_view_ids.data()};
462 GLsizei texture_binding = 0;
463 GLsizei image_binding = 0;
464 std::array<GLuint, MAX_TEXTURES> textures;
465 std::array<GLuint, MAX_IMAGES> images;
466 const auto prepare_stage{[&](size_t stage) {
467 buffer_cache.runtime.SetImagePointers(&textures[texture_binding], &images[image_binding]);
468 buffer_cache.BindHostStageBuffers(stage);
469
470 texture_binding += num_texture_buffers[stage];
471 image_binding += num_image_buffers[stage];
472
473 views_it += num_texture_buffers[stage];
474 views_it += num_image_buffers[stage];
475
476 const auto& info{stage_infos[stage]};
477 for (const auto& desc : info.texture_descriptors) {
478 for (u32 index = 0; index < desc.count; ++index) {
479 ImageView& image_view{texture_cache.GetImageView(*(views_it++))};
480 textures[texture_binding++] = image_view.Handle(desc.type);
481 }
482 }
483 for (const auto& desc : info.image_descriptors) {
484 for (u32 index = 0; index < desc.count; ++index) {
485 ImageView& image_view{texture_cache.GetImageView(*(views_it++))};
486 if (desc.is_written) {
487 texture_cache.MarkModification(image_view.image_id);
488 }
489 images[image_binding++] = image_view.StorageView(desc.type, desc.format);
490 }
491 }
492 }};
493 if constexpr (Spec::enabled_stages[0]) {
494 prepare_stage(0);
495 }
496 if constexpr (Spec::enabled_stages[1]) {
497 prepare_stage(1);
498 }
499 if constexpr (Spec::enabled_stages[2]) {
500 prepare_stage(2);
501 }
502 if constexpr (Spec::enabled_stages[3]) {
503 prepare_stage(3);
504 }
505 if constexpr (Spec::enabled_stages[4]) {
506 prepare_stage(4);
507 }
508 if (texture_binding != 0) {
509 ASSERT(texture_binding == sampler_binding);
510 glBindTextures(0, texture_binding, textures.data());
511 glBindSamplers(0, sampler_binding, samplers.data());
512 }
513 if (image_binding != 0) {
514 glBindImageTextures(0, image_binding, images.data());
515 }
516}
517
518void GraphicsPipeline::ConfigureTransformFeedbackImpl() const {
519 glTransformFeedbackStreamAttribsNV(num_xfb_attribs, xfb_attribs.data(), num_xfb_strides,
520 xfb_streams.data(), GL_INTERLEAVED_ATTRIBS);
521}
522
523void GraphicsPipeline::GenerateTransformFeedbackState() {
524 // TODO(Rodrigo): Inject SKIP_COMPONENTS*_NV when required. An unimplemented message will signal
525 // when this is required.
526 GLint* cursor{xfb_attribs.data()};
527 GLint* current_stream{xfb_streams.data()};
528
529 for (size_t feedback = 0; feedback < Maxwell::NumTransformFeedbackBuffers; ++feedback) {
530 const auto& layout = key.xfb_state.layouts[feedback];
531 UNIMPLEMENTED_IF_MSG(layout.stride != layout.varying_count * 4, "Stride padding");
532 if (layout.varying_count == 0) {
533 continue;
534 }
535 *current_stream = static_cast<GLint>(feedback);
536 if (current_stream != xfb_streams.data()) {
537 // When stepping one stream, push the expected token
538 cursor[0] = GL_NEXT_BUFFER_NV;
539 cursor[1] = 0;
540 cursor[2] = 0;
541 cursor += XFB_ENTRY_STRIDE;
542 }
543 ++current_stream;
544
545 const auto& locations = key.xfb_state.varyings[feedback];
546 std::optional<u8> current_index;
547 for (u32 offset = 0; offset < layout.varying_count; ++offset) {
548 const u8 location = locations[offset];
549 const u8 index = location / 4;
550
551 if (current_index == index) {
552 // Increase number of components of the previous attachment
553 ++cursor[-2];
554 continue;
555 }
556 current_index = index;
557
558 std::tie(cursor[0], cursor[2]) = TransformFeedbackEnum(location);
559 cursor[1] = 1;
560 cursor += XFB_ENTRY_STRIDE;
561 }
562 }
563 num_xfb_attribs = static_cast<GLsizei>((cursor - xfb_attribs.data()) / XFB_ENTRY_STRIDE);
564 num_xfb_strides = static_cast<GLsizei>(current_stream - xfb_streams.data());
565}
566
567void GraphicsPipeline::WaitForBuild() {
568 std::unique_lock lock{built_mutex};
569 built_condvar.wait(lock, [this] { return is_built.load(std::memory_order::relaxed); });
570}
571
572} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_graphics_pipeline.h b/src/video_core/renderer_opengl/gl_graphics_pipeline.h
new file mode 100644
index 000000000..4e28d9a42
--- /dev/null
+++ b/src/video_core/renderer_opengl/gl_graphics_pipeline.h
@@ -0,0 +1,169 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <array>
8#include <cstring>
9#include <type_traits>
10#include <utility>
11
12#include "common/bit_field.h"
13#include "common/cityhash.h"
14#include "common/common_types.h"
15#include "shader_recompiler/shader_info.h"
16#include "video_core/engines/maxwell_3d.h"
17#include "video_core/memory_manager.h"
18#include "video_core/renderer_opengl/gl_buffer_cache.h"
19#include "video_core/renderer_opengl/gl_resource_manager.h"
20#include "video_core/renderer_opengl/gl_texture_cache.h"
21#include "video_core/transform_feedback.h"
22
23namespace OpenGL {
24
25namespace ShaderContext {
26struct Context;
27}
28
29class Device;
30class ProgramManager;
31
32using Maxwell = Tegra::Engines::Maxwell3D::Regs;
33using ShaderWorker = Common::StatefulThreadWorker<ShaderContext::Context>;
34
35struct GraphicsPipelineKey {
36 std::array<u64, 6> unique_hashes;
37 union {
38 u32 raw;
39 BitField<0, 1, u32> xfb_enabled;
40 BitField<1, 1, u32> early_z;
41 BitField<2, 4, Maxwell::PrimitiveTopology> gs_input_topology;
42 BitField<6, 2, Maxwell::TessellationPrimitive> tessellation_primitive;
43 BitField<8, 2, Maxwell::TessellationSpacing> tessellation_spacing;
44 BitField<10, 1, u32> tessellation_clockwise;
45 };
46 std::array<u32, 3> padding;
47 VideoCommon::TransformFeedbackState xfb_state;
48
49 size_t Hash() const noexcept {
50 return static_cast<size_t>(Common::CityHash64(reinterpret_cast<const char*>(this), Size()));
51 }
52
53 bool operator==(const GraphicsPipelineKey& rhs) const noexcept {
54 return std::memcmp(this, &rhs, Size()) == 0;
55 }
56
57 bool operator!=(const GraphicsPipelineKey& rhs) const noexcept {
58 return !operator==(rhs);
59 }
60
61 [[nodiscard]] size_t Size() const noexcept {
62 if (xfb_enabled != 0) {
63 return sizeof(GraphicsPipelineKey);
64 } else {
65 return offsetof(GraphicsPipelineKey, padding);
66 }
67 }
68};
69static_assert(std::has_unique_object_representations_v<GraphicsPipelineKey>);
70static_assert(std::is_trivially_copyable_v<GraphicsPipelineKey>);
71static_assert(std::is_trivially_constructible_v<GraphicsPipelineKey>);
72
73class GraphicsPipeline {
74public:
75 explicit GraphicsPipeline(const Device& device, TextureCache& texture_cache_,
76 BufferCache& buffer_cache_, Tegra::MemoryManager& gpu_memory_,
77 Tegra::Engines::Maxwell3D& maxwell3d_,
78 ProgramManager& program_manager_, StateTracker& state_tracker_,
79 ShaderWorker* thread_worker, VideoCore::ShaderNotify* shader_notify,
80 std::array<std::string, 5> sources,
81 std::array<std::vector<u32>, 5> sources_spirv,
82 const std::array<const Shader::Info*, 5>& infos,
83 const GraphicsPipelineKey& key_);
84
85 void Configure(bool is_indexed) {
86 configure_func(this, is_indexed);
87 }
88
89 void ConfigureTransformFeedback() const {
90 if (num_xfb_attribs != 0) {
91 ConfigureTransformFeedbackImpl();
92 }
93 }
94
95 [[nodiscard]] const GraphicsPipelineKey& Key() const noexcept {
96 return key;
97 }
98
99 [[nodiscard]] bool WritesGlobalMemory() const noexcept {
100 return writes_global_memory;
101 }
102
103 [[nodiscard]] bool IsBuilt() const noexcept {
104 return is_built.load(std::memory_order::relaxed);
105 }
106
107 template <typename Spec>
108 static auto MakeConfigureSpecFunc() {
109 return [](GraphicsPipeline* pipeline, bool is_indexed) {
110 pipeline->ConfigureImpl<Spec>(is_indexed);
111 };
112 }
113
114private:
115 template <typename Spec>
116 void ConfigureImpl(bool is_indexed);
117
118 void ConfigureTransformFeedbackImpl() const;
119
120 void GenerateTransformFeedbackState();
121
122 void WaitForBuild();
123
124 TextureCache& texture_cache;
125 BufferCache& buffer_cache;
126 Tegra::MemoryManager& gpu_memory;
127 Tegra::Engines::Maxwell3D& maxwell3d;
128 ProgramManager& program_manager;
129 StateTracker& state_tracker;
130 const GraphicsPipelineKey key;
131
132 void (*configure_func)(GraphicsPipeline*, bool){};
133
134 std::array<OGLProgram, 5> source_programs;
135 std::array<OGLAssemblyProgram, 5> assembly_programs;
136 u32 enabled_stages_mask{};
137
138 std::array<Shader::Info, 5> stage_infos{};
139 std::array<u32, 5> enabled_uniform_buffer_masks{};
140 VideoCommon::UniformBufferSizes uniform_buffer_sizes{};
141 std::array<u32, 5> base_uniform_bindings{};
142 std::array<u32, 5> base_storage_bindings{};
143 std::array<u32, 5> num_texture_buffers{};
144 std::array<u32, 5> num_image_buffers{};
145
146 bool use_storage_buffers{};
147 bool writes_global_memory{};
148
149 static constexpr std::size_t XFB_ENTRY_STRIDE = 3;
150 GLsizei num_xfb_attribs{};
151 GLsizei num_xfb_strides{};
152 std::array<GLint, 128 * XFB_ENTRY_STRIDE * Maxwell::NumTransformFeedbackBuffers> xfb_attribs{};
153 std::array<GLint, Maxwell::NumTransformFeedbackBuffers> xfb_streams{};
154
155 std::mutex built_mutex;
156 std::condition_variable built_condvar;
157 std::atomic_bool is_built{false};
158};
159
160} // namespace OpenGL
161
162namespace std {
163template <>
164struct hash<OpenGL::GraphicsPipelineKey> {
165 size_t operator()(const OpenGL::GraphicsPipelineKey& k) const noexcept {
166 return k.Hash();
167 }
168};
169} // namespace std
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index ceb3abcb2..41d2b73f4 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -23,7 +23,6 @@
23#include "core/memory.h" 23#include "core/memory.h"
24#include "video_core/engines/kepler_compute.h" 24#include "video_core/engines/kepler_compute.h"
25#include "video_core/engines/maxwell_3d.h" 25#include "video_core/engines/maxwell_3d.h"
26#include "video_core/engines/shader_type.h"
27#include "video_core/memory_manager.h" 26#include "video_core/memory_manager.h"
28#include "video_core/renderer_opengl/gl_device.h" 27#include "video_core/renderer_opengl/gl_device.h"
29#include "video_core/renderer_opengl/gl_query_cache.h" 28#include "video_core/renderer_opengl/gl_query_cache.h"
@@ -40,7 +39,6 @@ namespace OpenGL {
40using Maxwell = Tegra::Engines::Maxwell3D::Regs; 39using Maxwell = Tegra::Engines::Maxwell3D::Regs;
41using GLvec4 = std::array<GLfloat, 4>; 40using GLvec4 = std::array<GLfloat, 4>;
42 41
43using Tegra::Engines::ShaderType;
44using VideoCore::Surface::PixelFormat; 42using VideoCore::Surface::PixelFormat;
45using VideoCore::Surface::SurfaceTarget; 43using VideoCore::Surface::SurfaceTarget;
46using VideoCore::Surface::SurfaceType; 44using VideoCore::Surface::SurfaceType;
@@ -51,112 +49,11 @@ MICROPROFILE_DEFINE(OpenGL_Blits, "OpenGL", "Blits", MP_RGB(128, 128, 192));
51MICROPROFILE_DEFINE(OpenGL_CacheManagement, "OpenGL", "Cache Management", MP_RGB(100, 255, 100)); 49MICROPROFILE_DEFINE(OpenGL_CacheManagement, "OpenGL", "Cache Management", MP_RGB(100, 255, 100));
52 50
53namespace { 51namespace {
54
55constexpr size_t NUM_SUPPORTED_VERTEX_ATTRIBUTES = 16; 52constexpr size_t NUM_SUPPORTED_VERTEX_ATTRIBUTES = 16;
56 53
57struct TextureHandle {
58 constexpr TextureHandle(u32 data, bool via_header_index) {
59 const Tegra::Texture::TextureHandle handle{data};
60 image = handle.tic_id;
61 sampler = via_header_index ? image : handle.tsc_id.Value();
62 }
63
64 u32 image;
65 u32 sampler;
66};
67
68template <typename Engine, typename Entry>
69TextureHandle GetTextureInfo(const Engine& engine, bool via_header_index, const Entry& entry,
70 ShaderType shader_type, size_t index = 0) {
71 if constexpr (std::is_same_v<Entry, SamplerEntry>) {
72 if (entry.is_separated) {
73 const u32 buffer_1 = entry.buffer;
74 const u32 buffer_2 = entry.secondary_buffer;
75 const u32 offset_1 = entry.offset;
76 const u32 offset_2 = entry.secondary_offset;
77 const u32 handle_1 = engine.AccessConstBuffer32(shader_type, buffer_1, offset_1);
78 const u32 handle_2 = engine.AccessConstBuffer32(shader_type, buffer_2, offset_2);
79 return TextureHandle(handle_1 | handle_2, via_header_index);
80 }
81 }
82 if (entry.is_bindless) {
83 const u32 raw = engine.AccessConstBuffer32(shader_type, entry.buffer, entry.offset);
84 return TextureHandle(raw, via_header_index);
85 }
86 const u32 buffer = engine.GetBoundBuffer();
87 const u64 offset = (entry.offset + index) * sizeof(u32);
88 return TextureHandle(engine.AccessConstBuffer32(shader_type, buffer, offset), via_header_index);
89}
90
91/// Translates hardware transform feedback indices
92/// @param location Hardware location
93/// @return Pair of ARB_transform_feedback3 token stream first and third arguments
94/// @note Read https://www.khronos.org/registry/OpenGL/extensions/ARB/ARB_transform_feedback3.txt
95std::pair<GLint, GLint> TransformFeedbackEnum(u8 location) {
96 const u8 index = location / 4;
97 if (index >= 8 && index <= 39) {
98 return {GL_GENERIC_ATTRIB_NV, index - 8};
99 }
100 if (index >= 48 && index <= 55) {
101 return {GL_TEXTURE_COORD_NV, index - 48};
102 }
103 switch (index) {
104 case 7:
105 return {GL_POSITION, 0};
106 case 40:
107 return {GL_PRIMARY_COLOR_NV, 0};
108 case 41:
109 return {GL_SECONDARY_COLOR_NV, 0};
110 case 42:
111 return {GL_BACK_PRIMARY_COLOR_NV, 0};
112 case 43:
113 return {GL_BACK_SECONDARY_COLOR_NV, 0};
114 }
115 UNIMPLEMENTED_MSG("index={}", index);
116 return {GL_POSITION, 0};
117}
118
119void oglEnable(GLenum cap, bool state) { 54void oglEnable(GLenum cap, bool state) {
120 (state ? glEnable : glDisable)(cap); 55 (state ? glEnable : glDisable)(cap);
121} 56}
122
123ImageViewType ImageViewTypeFromEntry(const SamplerEntry& entry) {
124 if (entry.is_buffer) {
125 return ImageViewType::Buffer;
126 }
127 switch (entry.type) {
128 case Tegra::Shader::TextureType::Texture1D:
129 return entry.is_array ? ImageViewType::e1DArray : ImageViewType::e1D;
130 case Tegra::Shader::TextureType::Texture2D:
131 return entry.is_array ? ImageViewType::e2DArray : ImageViewType::e2D;
132 case Tegra::Shader::TextureType::Texture3D:
133 return ImageViewType::e3D;
134 case Tegra::Shader::TextureType::TextureCube:
135 return entry.is_array ? ImageViewType::CubeArray : ImageViewType::Cube;
136 }
137 UNREACHABLE();
138 return ImageViewType::e2D;
139}
140
141ImageViewType ImageViewTypeFromEntry(const ImageEntry& entry) {
142 switch (entry.type) {
143 case Tegra::Shader::ImageType::Texture1D:
144 return ImageViewType::e1D;
145 case Tegra::Shader::ImageType::Texture1DArray:
146 return ImageViewType::e1DArray;
147 case Tegra::Shader::ImageType::Texture2D:
148 return ImageViewType::e2D;
149 case Tegra::Shader::ImageType::Texture2DArray:
150 return ImageViewType::e2DArray;
151 case Tegra::Shader::ImageType::Texture3D:
152 return ImageViewType::e3D;
153 case Tegra::Shader::ImageType::TextureBuffer:
154 return ImageViewType::Buffer;
155 }
156 UNREACHABLE();
157 return ImageViewType::e2D;
158}
159
160} // Anonymous namespace 57} // Anonymous namespace
161 58
162RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_, 59RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_,
@@ -170,14 +67,10 @@ RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window_, Tegra
170 texture_cache(texture_cache_runtime, *this, maxwell3d, kepler_compute, gpu_memory), 67 texture_cache(texture_cache_runtime, *this, maxwell3d, kepler_compute, gpu_memory),
171 buffer_cache_runtime(device), 68 buffer_cache_runtime(device),
172 buffer_cache(*this, maxwell3d, kepler_compute, gpu_memory, cpu_memory_, buffer_cache_runtime), 69 buffer_cache(*this, maxwell3d, kepler_compute, gpu_memory, cpu_memory_, buffer_cache_runtime),
173 shader_cache(*this, emu_window_, gpu, maxwell3d, kepler_compute, gpu_memory, device), 70 shader_cache(*this, emu_window_, maxwell3d, kepler_compute, gpu_memory, device, texture_cache,
71 buffer_cache, program_manager, state_tracker, gpu.ShaderNotify()),
174 query_cache(*this, maxwell3d, gpu_memory), accelerate_dma(buffer_cache), 72 query_cache(*this, maxwell3d, gpu_memory), accelerate_dma(buffer_cache),
175 fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache), 73 fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache) {}
176 async_shaders(emu_window_) {
177 if (device.UseAsynchronousShaders()) {
178 async_shaders.AllocateWorkers();
179 }
180}
181 74
182RasterizerOpenGL::~RasterizerOpenGL() = default; 75RasterizerOpenGL::~RasterizerOpenGL() = default;
183 76
@@ -204,7 +97,7 @@ void RasterizerOpenGL::SyncVertexFormats() {
204 const auto gl_index = static_cast<GLuint>(index); 97 const auto gl_index = static_cast<GLuint>(index);
205 98
206 // Disable constant attributes. 99 // Disable constant attributes.
207 if (attrib.IsConstant()) { 100 if (attrib.constant) {
208 glDisableVertexAttribArray(gl_index); 101 glDisableVertexAttribArray(gl_index);
209 continue; 102 continue;
210 } 103 }
@@ -244,116 +137,9 @@ void RasterizerOpenGL::SyncVertexInstances() {
244 } 137 }
245} 138}
246 139
247void RasterizerOpenGL::SetupShaders(bool is_indexed) {
248 u32 clip_distances = 0;
249
250 std::array<Shader*, Maxwell::MaxShaderStage> shaders{};
251 image_view_indices.clear();
252 sampler_handles.clear();
253
254 texture_cache.SynchronizeGraphicsDescriptors();
255
256 for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) {
257 const auto& shader_config = maxwell3d.regs.shader_config[index];
258 const auto program{static_cast<Maxwell::ShaderProgram>(index)};
259
260 // Skip stages that are not enabled
261 if (!maxwell3d.regs.IsShaderConfigEnabled(index)) {
262 switch (program) {
263 case Maxwell::ShaderProgram::Geometry:
264 program_manager.UseGeometryShader(0);
265 break;
266 case Maxwell::ShaderProgram::Fragment:
267 program_manager.UseFragmentShader(0);
268 break;
269 default:
270 break;
271 }
272 continue;
273 }
274 // Currently this stages are not supported in the OpenGL backend.
275 // TODO(Blinkhawk): Port tesselation shaders from Vulkan to OpenGL
276 if (program == Maxwell::ShaderProgram::TesselationControl ||
277 program == Maxwell::ShaderProgram::TesselationEval) {
278 continue;
279 }
280
281 Shader* const shader = shader_cache.GetStageProgram(program, async_shaders);
282 const GLuint program_handle = shader->IsBuilt() ? shader->GetHandle() : 0;
283 switch (program) {
284 case Maxwell::ShaderProgram::VertexA:
285 case Maxwell::ShaderProgram::VertexB:
286 program_manager.UseVertexShader(program_handle);
287 break;
288 case Maxwell::ShaderProgram::Geometry:
289 program_manager.UseGeometryShader(program_handle);
290 break;
291 case Maxwell::ShaderProgram::Fragment:
292 program_manager.UseFragmentShader(program_handle);
293 break;
294 default:
295 UNIMPLEMENTED_MSG("Unimplemented shader index={}, enable={}, offset=0x{:08X}", index,
296 shader_config.enable.Value(), shader_config.offset);
297 break;
298 }
299
300 // Stage indices are 0 - 5
301 const size_t stage = index == 0 ? 0 : index - 1;
302 shaders[stage] = shader;
303
304 SetupDrawTextures(shader, stage);
305 SetupDrawImages(shader, stage);
306
307 buffer_cache.SetEnabledUniformBuffers(stage, shader->GetEntries().enabled_uniform_buffers);
308
309 buffer_cache.UnbindGraphicsStorageBuffers(stage);
310 u32 ssbo_index = 0;
311 for (const auto& buffer : shader->GetEntries().global_memory_entries) {
312 buffer_cache.BindGraphicsStorageBuffer(stage, ssbo_index, buffer.cbuf_index,
313 buffer.cbuf_offset, buffer.is_written);
314 ++ssbo_index;
315 }
316
317 // Workaround for Intel drivers.
318 // When a clip distance is enabled but not set in the shader it crops parts of the screen
319 // (sometimes it's half the screen, sometimes three quarters). To avoid this, enable the
320 // clip distances only when it's written by a shader stage.
321 clip_distances |= shader->GetEntries().clip_distances;
322
323 // When VertexA is enabled, we have dual vertex shaders
324 if (program == Maxwell::ShaderProgram::VertexA) {
325 // VertexB was combined with VertexA, so we skip the VertexB iteration
326 ++index;
327 }
328 }
329 SyncClipEnabled(clip_distances);
330 maxwell3d.dirty.flags[Dirty::Shaders] = false;
331
332 buffer_cache.UpdateGraphicsBuffers(is_indexed);
333
334 const std::span indices_span(image_view_indices.data(), image_view_indices.size());
335 texture_cache.FillGraphicsImageViews(indices_span, image_view_ids);
336
337 buffer_cache.BindHostGeometryBuffers(is_indexed);
338
339 size_t image_view_index = 0;
340 size_t texture_index = 0;
341 size_t image_index = 0;
342 for (size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) {
343 const Shader* const shader = shaders[stage];
344 if (!shader) {
345 continue;
346 }
347 buffer_cache.BindHostStageBuffers(stage);
348 const auto& base = device.GetBaseBindings(stage);
349 BindTextures(shader->GetEntries(), base.sampler, base.image, image_view_index,
350 texture_index, image_index);
351 }
352}
353
354void RasterizerOpenGL::LoadDiskResources(u64 title_id, std::stop_token stop_loading, 140void RasterizerOpenGL::LoadDiskResources(u64 title_id, std::stop_token stop_loading,
355 const VideoCore::DiskResourceLoadCallback& callback) { 141 const VideoCore::DiskResourceLoadCallback& callback) {
356 shader_cache.LoadDiskCache(title_id, stop_loading, callback); 142 shader_cache.LoadDiskResources(title_id, stop_loading, callback);
357} 143}
358 144
359void RasterizerOpenGL::Clear() { 145void RasterizerOpenGL::Clear() {
@@ -432,16 +218,15 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
432 218
433 SyncState(); 219 SyncState();
434 220
435 // Setup shaders and their used resources. 221 GraphicsPipeline* const pipeline{shader_cache.CurrentGraphicsPipeline()};
222 if (!pipeline) {
223 return;
224 }
436 std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex}; 225 std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex};
437 SetupShaders(is_indexed); 226 pipeline->Configure(is_indexed);
438
439 texture_cache.UpdateRenderTargets(false);
440 state_tracker.BindFramebuffer(texture_cache.GetFramebuffer()->Handle());
441 program_manager.BindGraphicsPipeline();
442 227
443 const GLenum primitive_mode = MaxwellToGL::PrimitiveTopology(maxwell3d.regs.draw.topology); 228 const GLenum primitive_mode = MaxwellToGL::PrimitiveTopology(maxwell3d.regs.draw.topology);
444 BeginTransformFeedback(primitive_mode); 229 BeginTransformFeedback(pipeline, primitive_mode);
445 230
446 const GLuint base_instance = static_cast<GLuint>(maxwell3d.regs.vb_base_instance); 231 const GLuint base_instance = static_cast<GLuint>(maxwell3d.regs.vb_base_instance);
447 const GLsizei num_instances = 232 const GLsizei num_instances =
@@ -480,35 +265,24 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
480 num_instances, base_instance); 265 num_instances, base_instance);
481 } 266 }
482 } 267 }
483
484 EndTransformFeedback(); 268 EndTransformFeedback();
485 269
486 ++num_queued_commands; 270 ++num_queued_commands;
271 has_written_global_memory |= pipeline->WritesGlobalMemory();
487 272
488 gpu.TickWork(); 273 gpu.TickWork();
489} 274}
490 275
491void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) { 276void RasterizerOpenGL::DispatchCompute() {
492 Shader* const kernel = shader_cache.GetComputeKernel(code_addr); 277 ComputePipeline* const pipeline{shader_cache.CurrentComputePipeline()};
493 278 if (!pipeline) {
494 std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex}; 279 return;
495 BindComputeTextures(kernel); 280 }
496 281 pipeline->Configure();
497 const auto& entries = kernel->GetEntries(); 282 const auto& qmd{kepler_compute.launch_description};
498 buffer_cache.SetEnabledComputeUniformBuffers(entries.enabled_uniform_buffers); 283 glDispatchCompute(qmd.grid_dim_x, qmd.grid_dim_y, qmd.grid_dim_z);
499 buffer_cache.UnbindComputeStorageBuffers();
500 u32 ssbo_index = 0;
501 for (const auto& buffer : entries.global_memory_entries) {
502 buffer_cache.BindComputeStorageBuffer(ssbo_index, buffer.cbuf_index, buffer.cbuf_offset,
503 buffer.is_written);
504 ++ssbo_index;
505 }
506 buffer_cache.UpdateComputeBuffers();
507 buffer_cache.BindHostComputeBuffers();
508
509 const auto& launch_desc = kepler_compute.launch_description;
510 glDispatchCompute(launch_desc.grid_dim_x, launch_desc.grid_dim_y, launch_desc.grid_dim_z);
511 ++num_queued_commands; 284 ++num_queued_commands;
285 has_written_global_memory |= pipeline->WritesGlobalMemory();
512} 286}
513 287
514void RasterizerOpenGL::ResetCounter(VideoCore::QueryType type) { 288void RasterizerOpenGL::ResetCounter(VideoCore::QueryType type) {
@@ -661,7 +435,7 @@ void RasterizerOpenGL::WaitForIdle() {
661} 435}
662 436
663void RasterizerOpenGL::FragmentBarrier() { 437void RasterizerOpenGL::FragmentBarrier() {
664 glMemoryBarrier(GL_FRAMEBUFFER_BARRIER_BIT); 438 glMemoryBarrier(GL_FRAMEBUFFER_BARRIER_BIT | GL_TEXTURE_FETCH_BARRIER_BIT);
665} 439}
666 440
667void RasterizerOpenGL::TiledCacheBarrier() { 441void RasterizerOpenGL::TiledCacheBarrier() {
@@ -674,6 +448,13 @@ void RasterizerOpenGL::FlushCommands() {
674 return; 448 return;
675 } 449 }
676 num_queued_commands = 0; 450 num_queued_commands = 0;
451
452 // Make sure memory stored from the previous GL command stream is visible
453 // This is only needed on assembly shaders where we write to GPU memory with raw pointers
454 if (has_written_global_memory) {
455 has_written_global_memory = false;
456 glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT);
457 }
677 glFlush(); 458 glFlush();
678} 459}
679 460
@@ -721,111 +502,11 @@ bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config,
721 // ASSERT_MSG(image_view->size.width == config.width, "Framebuffer width is different"); 502 // ASSERT_MSG(image_view->size.width == config.width, "Framebuffer width is different");
722 // ASSERT_MSG(image_view->size.height == config.height, "Framebuffer height is different"); 503 // ASSERT_MSG(image_view->size.height == config.height, "Framebuffer height is different");
723 504
724 screen_info.display_texture = image_view->Handle(ImageViewType::e2D); 505 screen_info.display_texture = image_view->Handle(Shader::TextureType::Color2D);
725 screen_info.display_srgb = VideoCore::Surface::IsPixelFormatSRGB(image_view->format); 506 screen_info.display_srgb = VideoCore::Surface::IsPixelFormatSRGB(image_view->format);
726 return true; 507 return true;
727} 508}
728 509
729void RasterizerOpenGL::BindComputeTextures(Shader* kernel) {
730 image_view_indices.clear();
731 sampler_handles.clear();
732
733 texture_cache.SynchronizeComputeDescriptors();
734
735 SetupComputeTextures(kernel);
736 SetupComputeImages(kernel);
737
738 const std::span indices_span(image_view_indices.data(), image_view_indices.size());
739 texture_cache.FillComputeImageViews(indices_span, image_view_ids);
740
741 program_manager.BindCompute(kernel->GetHandle());
742 size_t image_view_index = 0;
743 size_t texture_index = 0;
744 size_t image_index = 0;
745 BindTextures(kernel->GetEntries(), 0, 0, image_view_index, texture_index, image_index);
746}
747
748void RasterizerOpenGL::BindTextures(const ShaderEntries& entries, GLuint base_texture,
749 GLuint base_image, size_t& image_view_index,
750 size_t& texture_index, size_t& image_index) {
751 const GLuint* const samplers = sampler_handles.data() + texture_index;
752 const GLuint* const textures = texture_handles.data() + texture_index;
753 const GLuint* const images = image_handles.data() + image_index;
754
755 const size_t num_samplers = entries.samplers.size();
756 for (const auto& sampler : entries.samplers) {
757 for (size_t i = 0; i < sampler.size; ++i) {
758 const ImageViewId image_view_id = image_view_ids[image_view_index++];
759 const ImageView& image_view = texture_cache.GetImageView(image_view_id);
760 const GLuint handle = image_view.Handle(ImageViewTypeFromEntry(sampler));
761 texture_handles[texture_index++] = handle;
762 }
763 }
764 const size_t num_images = entries.images.size();
765 for (size_t unit = 0; unit < num_images; ++unit) {
766 // TODO: Mark as modified
767 const ImageViewId image_view_id = image_view_ids[image_view_index++];
768 const ImageView& image_view = texture_cache.GetImageView(image_view_id);
769 const GLuint handle = image_view.Handle(ImageViewTypeFromEntry(entries.images[unit]));
770 image_handles[image_index] = handle;
771 ++image_index;
772 }
773 if (num_samplers > 0) {
774 glBindSamplers(base_texture, static_cast<GLsizei>(num_samplers), samplers);
775 glBindTextures(base_texture, static_cast<GLsizei>(num_samplers), textures);
776 }
777 if (num_images > 0) {
778 glBindImageTextures(base_image, static_cast<GLsizei>(num_images), images);
779 }
780}
781
782void RasterizerOpenGL::SetupDrawTextures(const Shader* shader, size_t stage_index) {
783 const bool via_header_index =
784 maxwell3d.regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex;
785 for (const auto& entry : shader->GetEntries().samplers) {
786 const auto shader_type = static_cast<ShaderType>(stage_index);
787 for (size_t index = 0; index < entry.size; ++index) {
788 const auto handle =
789 GetTextureInfo(maxwell3d, via_header_index, entry, shader_type, index);
790 const Sampler* const sampler = texture_cache.GetGraphicsSampler(handle.sampler);
791 sampler_handles.push_back(sampler->Handle());
792 image_view_indices.push_back(handle.image);
793 }
794 }
795}
796
797void RasterizerOpenGL::SetupComputeTextures(const Shader* kernel) {
798 const bool via_header_index = kepler_compute.launch_description.linked_tsc;
799 for (const auto& entry : kernel->GetEntries().samplers) {
800 for (size_t i = 0; i < entry.size; ++i) {
801 const auto handle =
802 GetTextureInfo(kepler_compute, via_header_index, entry, ShaderType::Compute, i);
803 const Sampler* const sampler = texture_cache.GetComputeSampler(handle.sampler);
804 sampler_handles.push_back(sampler->Handle());
805 image_view_indices.push_back(handle.image);
806 }
807 }
808}
809
810void RasterizerOpenGL::SetupDrawImages(const Shader* shader, size_t stage_index) {
811 const bool via_header_index =
812 maxwell3d.regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex;
813 for (const auto& entry : shader->GetEntries().images) {
814 const auto shader_type = static_cast<ShaderType>(stage_index);
815 const auto handle = GetTextureInfo(maxwell3d, via_header_index, entry, shader_type);
816 image_view_indices.push_back(handle.image);
817 }
818}
819
820void RasterizerOpenGL::SetupComputeImages(const Shader* shader) {
821 const bool via_header_index = kepler_compute.launch_description.linked_tsc;
822 for (const auto& entry : shader->GetEntries().images) {
823 const auto handle =
824 GetTextureInfo(kepler_compute, via_header_index, entry, ShaderType::Compute);
825 image_view_indices.push_back(handle.image);
826 }
827}
828
829void RasterizerOpenGL::SyncState() { 510void RasterizerOpenGL::SyncState() {
830 SyncViewport(); 511 SyncViewport();
831 SyncRasterizeEnable(); 512 SyncRasterizeEnable();
@@ -941,7 +622,7 @@ void RasterizerOpenGL::SyncDepthClamp() {
941 622
942void RasterizerOpenGL::SyncClipEnabled(u32 clip_mask) { 623void RasterizerOpenGL::SyncClipEnabled(u32 clip_mask) {
943 auto& flags = maxwell3d.dirty.flags; 624 auto& flags = maxwell3d.dirty.flags;
944 if (!flags[Dirty::ClipDistances] && !flags[Dirty::Shaders]) { 625 if (!flags[Dirty::ClipDistances] && !flags[VideoCommon::Dirty::Shaders]) {
945 return; 626 return;
946 } 627 }
947 flags[Dirty::ClipDistances] = false; 628 flags[Dirty::ClipDistances] = false;
@@ -1318,68 +999,13 @@ void RasterizerOpenGL::SyncFramebufferSRGB() {
1318 oglEnable(GL_FRAMEBUFFER_SRGB, maxwell3d.regs.framebuffer_srgb); 999 oglEnable(GL_FRAMEBUFFER_SRGB, maxwell3d.regs.framebuffer_srgb);
1319} 1000}
1320 1001
1321void RasterizerOpenGL::SyncTransformFeedback() { 1002void RasterizerOpenGL::BeginTransformFeedback(GraphicsPipeline* program, GLenum primitive_mode) {
1322 // TODO(Rodrigo): Inject SKIP_COMPONENTS*_NV when required. An unimplemented message will signal
1323 // when this is required.
1324 const auto& regs = maxwell3d.regs;
1325
1326 static constexpr std::size_t STRIDE = 3;
1327 std::array<GLint, 128 * STRIDE * Maxwell::NumTransformFeedbackBuffers> attribs;
1328 std::array<GLint, Maxwell::NumTransformFeedbackBuffers> streams;
1329
1330 GLint* cursor = attribs.data();
1331 GLint* current_stream = streams.data();
1332
1333 for (std::size_t feedback = 0; feedback < Maxwell::NumTransformFeedbackBuffers; ++feedback) {
1334 const auto& layout = regs.tfb_layouts[feedback];
1335 UNIMPLEMENTED_IF_MSG(layout.stride != layout.varying_count * 4, "Stride padding");
1336 if (layout.varying_count == 0) {
1337 continue;
1338 }
1339
1340 *current_stream = static_cast<GLint>(feedback);
1341 if (current_stream != streams.data()) {
1342 // When stepping one stream, push the expected token
1343 cursor[0] = GL_NEXT_BUFFER_NV;
1344 cursor[1] = 0;
1345 cursor[2] = 0;
1346 cursor += STRIDE;
1347 }
1348 ++current_stream;
1349
1350 const auto& locations = regs.tfb_varying_locs[feedback];
1351 std::optional<u8> current_index;
1352 for (u32 offset = 0; offset < layout.varying_count; ++offset) {
1353 const u8 location = locations[offset];
1354 const u8 index = location / 4;
1355
1356 if (current_index == index) {
1357 // Increase number of components of the previous attachment
1358 ++cursor[-2];
1359 continue;
1360 }
1361 current_index = index;
1362
1363 std::tie(cursor[0], cursor[2]) = TransformFeedbackEnum(location);
1364 cursor[1] = 1;
1365 cursor += STRIDE;
1366 }
1367 }
1368
1369 const GLsizei num_attribs = static_cast<GLsizei>((cursor - attribs.data()) / STRIDE);
1370 const GLsizei num_strides = static_cast<GLsizei>(current_stream - streams.data());
1371 glTransformFeedbackStreamAttribsNV(num_attribs, attribs.data(), num_strides, streams.data(),
1372 GL_INTERLEAVED_ATTRIBS);
1373}
1374
1375void RasterizerOpenGL::BeginTransformFeedback(GLenum primitive_mode) {
1376 const auto& regs = maxwell3d.regs; 1003 const auto& regs = maxwell3d.regs;
1377 if (regs.tfb_enabled == 0) { 1004 if (regs.tfb_enabled == 0) {
1378 return; 1005 return;
1379 } 1006 }
1380 if (device.UseAssemblyShaders()) { 1007 program->ConfigureTransformFeedback();
1381 SyncTransformFeedback(); 1008
1382 }
1383 UNIMPLEMENTED_IF(regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::TesselationControl) || 1009 UNIMPLEMENTED_IF(regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::TesselationControl) ||
1384 regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::TesselationEval) || 1010 regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::TesselationEval) ||
1385 regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::Geometry)); 1011 regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::Geometry));
@@ -1393,11 +1019,9 @@ void RasterizerOpenGL::BeginTransformFeedback(GLenum primitive_mode) {
1393} 1019}
1394 1020
1395void RasterizerOpenGL::EndTransformFeedback() { 1021void RasterizerOpenGL::EndTransformFeedback() {
1396 const auto& regs = maxwell3d.regs; 1022 if (maxwell3d.regs.tfb_enabled != 0) {
1397 if (regs.tfb_enabled == 0) { 1023 glEndTransformFeedback();
1398 return;
1399 } 1024 }
1400 glEndTransformFeedback();
1401} 1025}
1402 1026
1403AccelerateDMA::AccelerateDMA(BufferCache& buffer_cache_) : buffer_cache{buffer_cache_} {} 1027AccelerateDMA::AccelerateDMA(BufferCache& buffer_cache_) : buffer_cache{buffer_cache_} {}
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
index d30ad698f..d0397b745 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -28,11 +28,9 @@
28#include "video_core/renderer_opengl/gl_query_cache.h" 28#include "video_core/renderer_opengl/gl_query_cache.h"
29#include "video_core/renderer_opengl/gl_resource_manager.h" 29#include "video_core/renderer_opengl/gl_resource_manager.h"
30#include "video_core/renderer_opengl/gl_shader_cache.h" 30#include "video_core/renderer_opengl/gl_shader_cache.h"
31#include "video_core/renderer_opengl/gl_shader_decompiler.h"
32#include "video_core/renderer_opengl/gl_shader_manager.h" 31#include "video_core/renderer_opengl/gl_shader_manager.h"
33#include "video_core/renderer_opengl/gl_state_tracker.h" 32#include "video_core/renderer_opengl/gl_state_tracker.h"
34#include "video_core/renderer_opengl/gl_texture_cache.h" 33#include "video_core/renderer_opengl/gl_texture_cache.h"
35#include "video_core/shader/async_shaders.h"
36#include "video_core/textures/texture.h" 34#include "video_core/textures/texture.h"
37 35
38namespace Core::Memory { 36namespace Core::Memory {
@@ -81,7 +79,7 @@ public:
81 79
82 void Draw(bool is_indexed, bool is_instanced) override; 80 void Draw(bool is_indexed, bool is_instanced) override;
83 void Clear() override; 81 void Clear() override;
84 void DispatchCompute(GPUVAddr code_addr) override; 82 void DispatchCompute() override;
85 void ResetCounter(VideoCore::QueryType type) override; 83 void ResetCounter(VideoCore::QueryType type) override;
86 void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) override; 84 void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) override;
87 void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, u32 size) override; 85 void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, u32 size) override;
@@ -118,36 +116,11 @@ public:
118 return num_queued_commands > 0; 116 return num_queued_commands > 0;
119 } 117 }
120 118
121 VideoCommon::Shader::AsyncShaders& GetAsyncShaders() {
122 return async_shaders;
123 }
124
125 const VideoCommon::Shader::AsyncShaders& GetAsyncShaders() const {
126 return async_shaders;
127 }
128
129private: 119private:
130 static constexpr size_t MAX_TEXTURES = 192; 120 static constexpr size_t MAX_TEXTURES = 192;
131 static constexpr size_t MAX_IMAGES = 48; 121 static constexpr size_t MAX_IMAGES = 48;
132 static constexpr size_t MAX_IMAGE_VIEWS = MAX_TEXTURES + MAX_IMAGES; 122 static constexpr size_t MAX_IMAGE_VIEWS = MAX_TEXTURES + MAX_IMAGES;
133 123
134 void BindComputeTextures(Shader* kernel);
135
136 void BindTextures(const ShaderEntries& entries, GLuint base_texture, GLuint base_image,
137 size_t& image_view_index, size_t& texture_index, size_t& image_index);
138
139 /// Configures the current textures to use for the draw command.
140 void SetupDrawTextures(const Shader* shader, size_t stage_index);
141
142 /// Configures the textures used in a compute shader.
143 void SetupComputeTextures(const Shader* kernel);
144
145 /// Configures images in a graphics shader.
146 void SetupDrawImages(const Shader* shader, size_t stage_index);
147
148 /// Configures images in a compute shader.
149 void SetupComputeImages(const Shader* shader);
150
151 /// Syncs state to match guest's 124 /// Syncs state to match guest's
152 void SyncState(); 125 void SyncState();
153 126
@@ -220,18 +193,12 @@ private:
220 /// Syncs vertex instances to match the guest state 193 /// Syncs vertex instances to match the guest state
221 void SyncVertexInstances(); 194 void SyncVertexInstances();
222 195
223 /// Syncs transform feedback state to match guest state
224 /// @note Only valid on assembly shaders
225 void SyncTransformFeedback();
226
227 /// Begin a transform feedback 196 /// Begin a transform feedback
228 void BeginTransformFeedback(GLenum primitive_mode); 197 void BeginTransformFeedback(GraphicsPipeline* pipeline, GLenum primitive_mode);
229 198
230 /// End a transform feedback 199 /// End a transform feedback
231 void EndTransformFeedback(); 200 void EndTransformFeedback();
232 201
233 void SetupShaders(bool is_indexed);
234
235 Tegra::GPU& gpu; 202 Tegra::GPU& gpu;
236 Tegra::Engines::Maxwell3D& maxwell3d; 203 Tegra::Engines::Maxwell3D& maxwell3d;
237 Tegra::Engines::KeplerCompute& kepler_compute; 204 Tegra::Engines::KeplerCompute& kepler_compute;
@@ -246,13 +213,11 @@ private:
246 TextureCache texture_cache; 213 TextureCache texture_cache;
247 BufferCacheRuntime buffer_cache_runtime; 214 BufferCacheRuntime buffer_cache_runtime;
248 BufferCache buffer_cache; 215 BufferCache buffer_cache;
249 ShaderCacheOpenGL shader_cache; 216 ShaderCache shader_cache;
250 QueryCache query_cache; 217 QueryCache query_cache;
251 AccelerateDMA accelerate_dma; 218 AccelerateDMA accelerate_dma;
252 FenceManagerOpenGL fence_manager; 219 FenceManagerOpenGL fence_manager;
253 220
254 VideoCommon::Shader::AsyncShaders async_shaders;
255
256 boost::container::static_vector<u32, MAX_IMAGE_VIEWS> image_view_indices; 221 boost::container::static_vector<u32, MAX_IMAGE_VIEWS> image_view_indices;
257 std::array<ImageViewId, MAX_IMAGE_VIEWS> image_view_ids; 222 std::array<ImageViewId, MAX_IMAGE_VIEWS> image_view_ids;
258 boost::container::static_vector<GLuint, MAX_TEXTURES> sampler_handles; 223 boost::container::static_vector<GLuint, MAX_TEXTURES> sampler_handles;
@@ -260,7 +225,8 @@ private:
260 std::array<GLuint, MAX_IMAGES> image_handles{}; 225 std::array<GLuint, MAX_IMAGES> image_handles{};
261 226
262 /// Number of commands queued to the OpenGL driver. Resetted on flush. 227 /// Number of commands queued to the OpenGL driver. Resetted on flush.
263 std::size_t num_queued_commands = 0; 228 size_t num_queued_commands = 0;
229 bool has_written_global_memory = false;
264 230
265 u32 last_clip_distance_mask = 0; 231 u32 last_clip_distance_mask = 0;
266}; 232};
diff --git a/src/video_core/renderer_opengl/gl_resource_manager.cpp b/src/video_core/renderer_opengl/gl_resource_manager.cpp
index 3428e5e21..8695c29e3 100644
--- a/src/video_core/renderer_opengl/gl_resource_manager.cpp
+++ b/src/video_core/renderer_opengl/gl_resource_manager.cpp
@@ -83,18 +83,6 @@ void OGLSampler::Release() {
83 handle = 0; 83 handle = 0;
84} 84}
85 85
86void OGLShader::Create(std::string_view source, GLenum type) {
87 if (handle != 0) {
88 return;
89 }
90 if (source.empty()) {
91 return;
92 }
93
94 MICROPROFILE_SCOPE(OpenGL_ResourceCreation);
95 handle = GLShader::LoadShader(source, type);
96}
97
98void OGLShader::Release() { 86void OGLShader::Release() {
99 if (handle == 0) 87 if (handle == 0)
100 return; 88 return;
@@ -104,21 +92,6 @@ void OGLShader::Release() {
104 handle = 0; 92 handle = 0;
105} 93}
106 94
107void OGLProgram::CreateFromSource(const char* vert_shader, const char* geo_shader,
108 const char* frag_shader, bool separable_program,
109 bool hint_retrievable) {
110 OGLShader vert, geo, frag;
111 if (vert_shader)
112 vert.Create(vert_shader, GL_VERTEX_SHADER);
113 if (geo_shader)
114 geo.Create(geo_shader, GL_GEOMETRY_SHADER);
115 if (frag_shader)
116 frag.Create(frag_shader, GL_FRAGMENT_SHADER);
117
118 MICROPROFILE_SCOPE(OpenGL_ResourceCreation);
119 Create(separable_program, hint_retrievable, vert.handle, geo.handle, frag.handle);
120}
121
122void OGLProgram::Release() { 95void OGLProgram::Release() {
123 if (handle == 0) 96 if (handle == 0)
124 return; 97 return;
diff --git a/src/video_core/renderer_opengl/gl_resource_manager.h b/src/video_core/renderer_opengl/gl_resource_manager.h
index 552d79db4..b2d5bfd3b 100644
--- a/src/video_core/renderer_opengl/gl_resource_manager.h
+++ b/src/video_core/renderer_opengl/gl_resource_manager.h
@@ -8,7 +8,6 @@
8#include <utility> 8#include <utility>
9#include <glad/glad.h> 9#include <glad/glad.h>
10#include "common/common_types.h" 10#include "common/common_types.h"
11#include "video_core/renderer_opengl/gl_shader_util.h"
12 11
13namespace OpenGL { 12namespace OpenGL {
14 13
@@ -128,8 +127,6 @@ public:
128 return *this; 127 return *this;
129 } 128 }
130 129
131 void Create(std::string_view source, GLenum type);
132
133 void Release(); 130 void Release();
134 131
135 GLuint handle = 0; 132 GLuint handle = 0;
@@ -151,17 +148,6 @@ public:
151 return *this; 148 return *this;
152 } 149 }
153 150
154 template <typename... T>
155 void Create(bool separable_program, bool hint_retrievable, T... shaders) {
156 if (handle != 0)
157 return;
158 handle = GLShader::LoadProgram(separable_program, hint_retrievable, shaders...);
159 }
160
161 /// Creates a new internal OpenGL resource and stores the handle
162 void CreateFromSource(const char* vert_shader, const char* geo_shader, const char* frag_shader,
163 bool separable_program = false, bool hint_retrievable = false);
164
165 /// Deletes the internal OpenGL resource 151 /// Deletes the internal OpenGL resource
166 void Release(); 152 void Release();
167 153
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp
index 5a01c59ec..8d6cc074c 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp
@@ -3,606 +3,544 @@
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include <atomic> 5#include <atomic>
6#include <fstream>
6#include <functional> 7#include <functional>
7#include <mutex> 8#include <mutex>
8#include <optional>
9#include <string> 9#include <string>
10#include <thread> 10#include <thread>
11#include <unordered_set>
12 11
13#include "common/alignment.h" 12#include "common/alignment.h"
14#include "common/assert.h" 13#include "common/assert.h"
14#include "common/fs/fs.h"
15#include "common/fs/path_util.h"
15#include "common/logging/log.h" 16#include "common/logging/log.h"
16#include "common/scope_exit.h" 17#include "common/scope_exit.h"
18#include "common/settings.h"
19#include "common/thread_worker.h"
17#include "core/core.h" 20#include "core/core.h"
18#include "core/frontend/emu_window.h" 21#include "shader_recompiler/backend/glasm/emit_glasm.h"
22#include "shader_recompiler/backend/glsl/emit_glsl.h"
23#include "shader_recompiler/backend/spirv/emit_spirv.h"
24#include "shader_recompiler/frontend/ir/program.h"
25#include "shader_recompiler/frontend/maxwell/control_flow.h"
26#include "shader_recompiler/frontend/maxwell/translate_program.h"
27#include "shader_recompiler/profile.h"
19#include "video_core/engines/kepler_compute.h" 28#include "video_core/engines/kepler_compute.h"
20#include "video_core/engines/maxwell_3d.h" 29#include "video_core/engines/maxwell_3d.h"
21#include "video_core/engines/shader_type.h"
22#include "video_core/memory_manager.h" 30#include "video_core/memory_manager.h"
23#include "video_core/renderer_opengl/gl_arb_decompiler.h"
24#include "video_core/renderer_opengl/gl_rasterizer.h" 31#include "video_core/renderer_opengl/gl_rasterizer.h"
25#include "video_core/renderer_opengl/gl_resource_manager.h" 32#include "video_core/renderer_opengl/gl_resource_manager.h"
26#include "video_core/renderer_opengl/gl_shader_cache.h" 33#include "video_core/renderer_opengl/gl_shader_cache.h"
27#include "video_core/renderer_opengl/gl_shader_decompiler.h" 34#include "video_core/renderer_opengl/gl_shader_util.h"
28#include "video_core/renderer_opengl/gl_shader_disk_cache.h"
29#include "video_core/renderer_opengl/gl_state_tracker.h" 35#include "video_core/renderer_opengl/gl_state_tracker.h"
30#include "video_core/shader/memory_util.h"
31#include "video_core/shader/registry.h"
32#include "video_core/shader/shader_ir.h"
33#include "video_core/shader_cache.h" 36#include "video_core/shader_cache.h"
37#include "video_core/shader_environment.h"
34#include "video_core/shader_notify.h" 38#include "video_core/shader_notify.h"
35 39
36namespace OpenGL { 40namespace OpenGL {
37
38using Tegra::Engines::ShaderType;
39using VideoCommon::Shader::GetShaderAddress;
40using VideoCommon::Shader::GetShaderCode;
41using VideoCommon::Shader::GetUniqueIdentifier;
42using VideoCommon::Shader::KERNEL_MAIN_OFFSET;
43using VideoCommon::Shader::ProgramCode;
44using VideoCommon::Shader::Registry;
45using VideoCommon::Shader::ShaderIR;
46using VideoCommon::Shader::STAGE_MAIN_OFFSET;
47
48namespace { 41namespace {
49 42using Shader::Backend::GLASM::EmitGLASM;
50constexpr VideoCommon::Shader::CompilerSettings COMPILER_SETTINGS{}; 43using Shader::Backend::GLSL::EmitGLSL;
51 44using Shader::Backend::SPIRV::EmitSPIRV;
52/// Gets the shader type from a Maxwell program type 45using Shader::Maxwell::MergeDualVertexPrograms;
53constexpr GLenum GetGLShaderType(ShaderType shader_type) { 46using Shader::Maxwell::TranslateProgram;
54 switch (shader_type) { 47using VideoCommon::ComputeEnvironment;
55 case ShaderType::Vertex: 48using VideoCommon::FileEnvironment;
56 return GL_VERTEX_SHADER; 49using VideoCommon::GenericEnvironment;
57 case ShaderType::Geometry: 50using VideoCommon::GraphicsEnvironment;
58 return GL_GEOMETRY_SHADER; 51using VideoCommon::LoadPipelines;
59 case ShaderType::Fragment: 52using VideoCommon::SerializePipeline;
60 return GL_FRAGMENT_SHADER; 53using Context = ShaderContext::Context;
61 case ShaderType::Compute: 54
62 return GL_COMPUTE_SHADER; 55constexpr u32 CACHE_VERSION = 5;
63 default: 56
64 return GL_NONE; 57template <typename Container>
65 } 58auto MakeSpan(Container& container) {
59 return std::span(container.data(), container.size());
66} 60}
67 61
68constexpr const char* GetShaderTypeName(ShaderType shader_type) { 62Shader::RuntimeInfo MakeRuntimeInfo(const GraphicsPipelineKey& key,
69 switch (shader_type) { 63 const Shader::IR::Program& program,
70 case ShaderType::Vertex: 64 const Shader::IR::Program* previous_program,
71 return "VS"; 65 bool glasm_use_storage_buffers, bool use_assembly_shaders) {
72 case ShaderType::TesselationControl: 66 Shader::RuntimeInfo info;
73 return "HS"; 67 if (previous_program) {
74 case ShaderType::TesselationEval: 68 info.previous_stage_stores = previous_program->info.stores;
75 return "DS"; 69 } else {
76 case ShaderType::Geometry: 70 // Mark all stores as available for vertex shaders
77 return "GS"; 71 info.previous_stage_stores.mask.set();
78 case ShaderType::Fragment: 72 }
79 return "FS"; 73 switch (program.stage) {
80 case ShaderType::Compute: 74 case Shader::Stage::VertexB:
81 return "CS"; 75 case Shader::Stage::Geometry:
82 } 76 if (!use_assembly_shaders && key.xfb_enabled != 0) {
83 return "UNK"; 77 info.xfb_varyings = VideoCommon::MakeTransformFeedbackVaryings(key.xfb_state);
78 }
79 break;
80 case Shader::Stage::TessellationEval:
81 info.tess_clockwise = key.tessellation_clockwise != 0;
82 info.tess_primitive = [&key] {
83 switch (key.tessellation_primitive) {
84 case Maxwell::TessellationPrimitive::Isolines:
85 return Shader::TessPrimitive::Isolines;
86 case Maxwell::TessellationPrimitive::Triangles:
87 return Shader::TessPrimitive::Triangles;
88 case Maxwell::TessellationPrimitive::Quads:
89 return Shader::TessPrimitive::Quads;
90 }
91 UNREACHABLE();
92 return Shader::TessPrimitive::Triangles;
93 }();
94 info.tess_spacing = [&] {
95 switch (key.tessellation_spacing) {
96 case Maxwell::TessellationSpacing::Equal:
97 return Shader::TessSpacing::Equal;
98 case Maxwell::TessellationSpacing::FractionalOdd:
99 return Shader::TessSpacing::FractionalOdd;
100 case Maxwell::TessellationSpacing::FractionalEven:
101 return Shader::TessSpacing::FractionalEven;
102 }
103 UNREACHABLE();
104 return Shader::TessSpacing::Equal;
105 }();
106 break;
107 case Shader::Stage::Fragment:
108 info.force_early_z = key.early_z != 0;
109 break;
110 default:
111 break;
112 }
113 switch (key.gs_input_topology) {
114 case Maxwell::PrimitiveTopology::Points:
115 info.input_topology = Shader::InputTopology::Points;
116 break;
117 case Maxwell::PrimitiveTopology::Lines:
118 case Maxwell::PrimitiveTopology::LineLoop:
119 case Maxwell::PrimitiveTopology::LineStrip:
120 info.input_topology = Shader::InputTopology::Lines;
121 break;
122 case Maxwell::PrimitiveTopology::Triangles:
123 case Maxwell::PrimitiveTopology::TriangleStrip:
124 case Maxwell::PrimitiveTopology::TriangleFan:
125 case Maxwell::PrimitiveTopology::Quads:
126 case Maxwell::PrimitiveTopology::QuadStrip:
127 case Maxwell::PrimitiveTopology::Polygon:
128 case Maxwell::PrimitiveTopology::Patches:
129 info.input_topology = Shader::InputTopology::Triangles;
130 break;
131 case Maxwell::PrimitiveTopology::LinesAdjacency:
132 case Maxwell::PrimitiveTopology::LineStripAdjacency:
133 info.input_topology = Shader::InputTopology::LinesAdjacency;
134 break;
135 case Maxwell::PrimitiveTopology::TrianglesAdjacency:
136 case Maxwell::PrimitiveTopology::TriangleStripAdjacency:
137 info.input_topology = Shader::InputTopology::TrianglesAdjacency;
138 break;
139 }
140 info.glasm_use_storage_buffers = glasm_use_storage_buffers;
141 return info;
84} 142}
85 143
86constexpr ShaderType GetShaderType(Maxwell::ShaderProgram program_type) { 144void SetXfbState(VideoCommon::TransformFeedbackState& state, const Maxwell& regs) {
87 switch (program_type) { 145 std::ranges::transform(regs.tfb_layouts, state.layouts.begin(), [](const auto& layout) {
88 case Maxwell::ShaderProgram::VertexA: 146 return VideoCommon::TransformFeedbackState::Layout{
89 case Maxwell::ShaderProgram::VertexB: 147 .stream = layout.stream,
90 return ShaderType::Vertex; 148 .varying_count = layout.varying_count,
91 case Maxwell::ShaderProgram::TesselationControl: 149 .stride = layout.stride,
92 return ShaderType::TesselationControl; 150 };
93 case Maxwell::ShaderProgram::TesselationEval: 151 });
94 return ShaderType::TesselationEval; 152 state.varyings = regs.tfb_varying_locs;
95 case Maxwell::ShaderProgram::Geometry:
96 return ShaderType::Geometry;
97 case Maxwell::ShaderProgram::Fragment:
98 return ShaderType::Fragment;
99 }
100 return {};
101} 153}
154} // Anonymous namespace
102 155
103constexpr GLenum AssemblyEnum(ShaderType shader_type) { 156ShaderCache::ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindow& emu_window_,
104 switch (shader_type) { 157 Tegra::Engines::Maxwell3D& maxwell3d_,
105 case ShaderType::Vertex: 158 Tegra::Engines::KeplerCompute& kepler_compute_,
106 return GL_VERTEX_PROGRAM_NV; 159 Tegra::MemoryManager& gpu_memory_, const Device& device_,
107 case ShaderType::TesselationControl: 160 TextureCache& texture_cache_, BufferCache& buffer_cache_,
108 return GL_TESS_CONTROL_PROGRAM_NV; 161 ProgramManager& program_manager_, StateTracker& state_tracker_,
109 case ShaderType::TesselationEval: 162 VideoCore::ShaderNotify& shader_notify_)
110 return GL_TESS_EVALUATION_PROGRAM_NV; 163 : VideoCommon::ShaderCache{rasterizer_, gpu_memory_, maxwell3d_, kepler_compute_},
111 case ShaderType::Geometry: 164 emu_window{emu_window_}, device{device_}, texture_cache{texture_cache_},
112 return GL_GEOMETRY_PROGRAM_NV; 165 buffer_cache{buffer_cache_}, program_manager{program_manager_}, state_tracker{state_tracker_},
113 case ShaderType::Fragment: 166 shader_notify{shader_notify_}, use_asynchronous_shaders{device.UseAsynchronousShaders()},
114 return GL_FRAGMENT_PROGRAM_NV; 167 profile{
115 case ShaderType::Compute: 168 .supported_spirv = 0x00010000,
116 return GL_COMPUTE_PROGRAM_NV; 169
170 .unified_descriptor_binding = false,
171 .support_descriptor_aliasing = false,
172 .support_int8 = false,
173 .support_int16 = false,
174 .support_int64 = device.HasShaderInt64(),
175 .support_vertex_instance_id = true,
176 .support_float_controls = false,
177 .support_separate_denorm_behavior = false,
178 .support_separate_rounding_mode = false,
179 .support_fp16_denorm_preserve = false,
180 .support_fp32_denorm_preserve = false,
181 .support_fp16_denorm_flush = false,
182 .support_fp32_denorm_flush = false,
183 .support_fp16_signed_zero_nan_preserve = false,
184 .support_fp32_signed_zero_nan_preserve = false,
185 .support_fp64_signed_zero_nan_preserve = false,
186 .support_explicit_workgroup_layout = false,
187 .support_vote = true,
188 .support_viewport_index_layer_non_geometry =
189 device.HasNvViewportArray2() || device.HasVertexViewportLayer(),
190 .support_viewport_mask = device.HasNvViewportArray2(),
191 .support_typeless_image_loads = device.HasImageLoadFormatted(),
192 .support_demote_to_helper_invocation = false,
193 .support_int64_atomics = false,
194 .support_derivative_control = device.HasDerivativeControl(),
195 .support_geometry_shader_passthrough = device.HasGeometryShaderPassthrough(),
196 .support_gl_nv_gpu_shader_5 = device.HasNvGpuShader5(),
197 .support_gl_amd_gpu_shader_half_float = device.HasAmdShaderHalfFloat(),
198 .support_gl_texture_shadow_lod = device.HasTextureShadowLod(),
199 .support_gl_warp_intrinsics = false,
200 .support_gl_variable_aoffi = device.HasVariableAoffi(),
201 .support_gl_sparse_textures = device.HasSparseTexture2(),
202 .support_gl_derivative_control = device.HasDerivativeControl(),
203
204 .warp_size_potentially_larger_than_guest = device.IsWarpSizePotentiallyLargerThanGuest(),
205
206 .lower_left_origin_mode = true,
207 .need_declared_frag_colors = true,
208 .need_fastmath_off = device.NeedsFastmathOff(),
209
210 .has_broken_spirv_clamp = true,
211 .has_broken_unsigned_image_offsets = true,
212 .has_broken_signed_operations = true,
213 .has_broken_fp16_float_controls = false,
214 .has_gl_component_indexing_bug = device.HasComponentIndexingBug(),
215 .has_gl_precise_bug = device.HasPreciseBug(),
216 .ignore_nan_fp_comparisons = true,
217 .gl_max_compute_smem_size = device.GetMaxComputeSharedMemorySize(),
218 },
219 host_info{
220 .support_float16 = false,
221 .support_int64 = device.HasShaderInt64(),
222 } {
223 if (use_asynchronous_shaders) {
224 workers = CreateWorkers();
117 } 225 }
118 return {};
119} 226}
120 227
121std::string MakeShaderID(u64 unique_identifier, ShaderType shader_type) { 228ShaderCache::~ShaderCache() = default;
122 return fmt::format("{}{:016X}", GetShaderTypeName(shader_type), unique_identifier);
123}
124 229
125std::shared_ptr<Registry> MakeRegistry(const ShaderDiskCacheEntry& entry) { 230void ShaderCache::LoadDiskResources(u64 title_id, std::stop_token stop_loading,
126 const VideoCore::GuestDriverProfile guest_profile{entry.texture_handler_size}; 231 const VideoCore::DiskResourceLoadCallback& callback) {
127 const VideoCommon::Shader::SerializedRegistryInfo info{guest_profile, entry.bound_buffer, 232 if (title_id == 0) {
128 entry.graphics_info, entry.compute_info}; 233 return;
129 auto registry = std::make_shared<Registry>(entry.type, info);
130 for (const auto& [address, value] : entry.keys) {
131 const auto [buffer, offset] = address;
132 registry->InsertKey(buffer, offset, value);
133 }
134 for (const auto& [offset, sampler] : entry.bound_samplers) {
135 registry->InsertBoundSampler(offset, sampler);
136 } 234 }
137 for (const auto& [key, sampler] : entry.bindless_samplers) { 235 const auto shader_dir{Common::FS::GetYuzuPath(Common::FS::YuzuPath::ShaderDir)};
138 const auto [buffer, offset] = key; 236 const auto base_dir{shader_dir / fmt::format("{:016x}", title_id)};
139 registry->InsertBindlessSampler(buffer, offset, sampler); 237 if (!Common::FS::CreateDir(shader_dir) || !Common::FS::CreateDir(base_dir)) {
238 LOG_ERROR(Common_Filesystem, "Failed to create shader cache directories");
239 return;
140 } 240 }
141 return registry; 241 shader_cache_filename = base_dir / "opengl.bin";
142} 242
143 243 if (!workers) {
144std::unordered_set<GLenum> GetSupportedFormats() { 244 workers = CreateWorkers();
145 GLint num_formats; 245 }
146 glGetIntegerv(GL_NUM_PROGRAM_BINARY_FORMATS, &num_formats); 246 struct {
247 std::mutex mutex;
248 size_t total{};
249 size_t built{};
250 bool has_loaded{};
251 } state;
252
253 const auto load_compute{[&](std::ifstream& file, FileEnvironment env) {
254 ComputePipelineKey key;
255 file.read(reinterpret_cast<char*>(&key), sizeof(key));
256 workers->QueueWork(
257 [this, key, env = std::move(env), &state, &callback](Context* ctx) mutable {
258 ctx->pools.ReleaseContents();
259 auto pipeline{CreateComputePipeline(ctx->pools, key, env)};
260 std::lock_guard lock{state.mutex};
261 if (pipeline) {
262 compute_cache.emplace(key, std::move(pipeline));
263 }
264 ++state.built;
265 if (state.has_loaded) {
266 callback(VideoCore::LoadCallbackStage::Build, state.built, state.total);
267 }
268 });
269 ++state.total;
270 }};
271 const auto load_graphics{[&](std::ifstream& file, std::vector<FileEnvironment> envs) {
272 GraphicsPipelineKey key;
273 file.read(reinterpret_cast<char*>(&key), sizeof(key));
274 workers->QueueWork(
275 [this, key, envs = std::move(envs), &state, &callback](Context* ctx) mutable {
276 boost::container::static_vector<Shader::Environment*, 5> env_ptrs;
277 for (auto& env : envs) {
278 env_ptrs.push_back(&env);
279 }
280 ctx->pools.ReleaseContents();
281 auto pipeline{CreateGraphicsPipeline(ctx->pools, key, MakeSpan(env_ptrs), false)};
282 std::lock_guard lock{state.mutex};
283 if (pipeline) {
284 graphics_cache.emplace(key, std::move(pipeline));
285 }
286 ++state.built;
287 if (state.has_loaded) {
288 callback(VideoCore::LoadCallbackStage::Build, state.built, state.total);
289 }
290 });
291 ++state.total;
292 }};
293 LoadPipelines(stop_loading, shader_cache_filename, CACHE_VERSION, load_compute, load_graphics);
147 294
148 std::vector<GLint> formats(num_formats); 295 std::unique_lock lock{state.mutex};
149 glGetIntegerv(GL_PROGRAM_BINARY_FORMATS, formats.data()); 296 callback(VideoCore::LoadCallbackStage::Build, 0, state.total);
297 state.has_loaded = true;
298 lock.unlock();
150 299
151 std::unordered_set<GLenum> supported_formats; 300 workers->WaitForRequests();
152 for (const GLint format : formats) { 301 if (!use_asynchronous_shaders) {
153 supported_formats.insert(static_cast<GLenum>(format)); 302 workers.reset();
154 } 303 }
155 return supported_formats;
156} 304}
157 305
158} // Anonymous namespace 306GraphicsPipeline* ShaderCache::CurrentGraphicsPipeline() {
159 307 if (!RefreshStages(graphics_key.unique_hashes)) {
160ProgramSharedPtr BuildShader(const Device& device, ShaderType shader_type, u64 unique_identifier, 308 current_pipeline = nullptr;
161 const ShaderIR& ir, const Registry& registry, bool hint_retrievable) { 309 return nullptr;
162 if (device.UseDriverCache()) { 310 }
163 // Ignore hint retrievable if we are using the driver cache 311 const auto& regs{maxwell3d.regs};
164 hint_retrievable = false; 312 graphics_key.raw = 0;
165 } 313 graphics_key.early_z.Assign(regs.force_early_fragment_tests != 0 ? 1 : 0);
166 const std::string shader_id = MakeShaderID(unique_identifier, shader_type); 314 graphics_key.gs_input_topology.Assign(graphics_key.unique_hashes[4] != 0
167 LOG_INFO(Render_OpenGL, "{}", shader_id); 315 ? regs.draw.topology.Value()
168 316 : Maxwell::PrimitiveTopology{});
169 auto program = std::make_shared<ProgramHandle>(); 317 graphics_key.tessellation_primitive.Assign(regs.tess_mode.prim.Value());
170 318 graphics_key.tessellation_spacing.Assign(regs.tess_mode.spacing.Value());
171 if (device.UseAssemblyShaders()) { 319 graphics_key.tessellation_clockwise.Assign(regs.tess_mode.cw.Value());
172 const std::string arb = 320 graphics_key.xfb_enabled.Assign(regs.tfb_enabled != 0 ? 1 : 0);
173 DecompileAssemblyShader(device, ir, registry, shader_type, shader_id); 321 if (graphics_key.xfb_enabled) {
174 322 SetXfbState(graphics_key.xfb_state, regs);
175 GLuint& arb_prog = program->assembly_program.handle; 323 }
176 324 if (current_pipeline && graphics_key == current_pipeline->Key()) {
177// Commented out functions signal OpenGL errors but are compatible with apitrace. 325 return BuiltPipeline(current_pipeline);
178// Use them only to capture and replay on apitrace. 326 }
179#if 0 327 return CurrentGraphicsPipelineSlowPath();
180 glGenProgramsNV(1, &arb_prog);
181 glLoadProgramNV(AssemblyEnum(shader_type), arb_prog, static_cast<GLsizei>(arb.size()),
182 reinterpret_cast<const GLubyte*>(arb.data()));
183#else
184 glGenProgramsARB(1, &arb_prog);
185 glNamedProgramStringEXT(arb_prog, AssemblyEnum(shader_type), GL_PROGRAM_FORMAT_ASCII_ARB,
186 static_cast<GLsizei>(arb.size()), arb.data());
187#endif
188 const auto err = reinterpret_cast<const char*>(glGetString(GL_PROGRAM_ERROR_STRING_NV));
189 if (err && *err) {
190 LOG_CRITICAL(Render_OpenGL, "{}", err);
191 LOG_INFO(Render_OpenGL, "\n{}", arb);
192 }
193 } else {
194 const std::string glsl = DecompileShader(device, ir, registry, shader_type, shader_id);
195 OGLShader shader;
196 shader.Create(glsl.c_str(), GetGLShaderType(shader_type));
197
198 program->source_program.Create(true, hint_retrievable, shader.handle);
199 }
200
201 return program;
202} 328}
203 329
204Shader::Shader(std::shared_ptr<Registry> registry_, ShaderEntries entries_, 330GraphicsPipeline* ShaderCache::CurrentGraphicsPipelineSlowPath() {
205 ProgramSharedPtr program_, bool is_built_) 331 const auto [pair, is_new]{graphics_cache.try_emplace(graphics_key)};
206 : registry{std::move(registry_)}, entries{std::move(entries_)}, program{std::move(program_)}, 332 auto& pipeline{pair->second};
207 is_built{is_built_} { 333 if (is_new) {
208 handle = program->assembly_program.handle; 334 pipeline = CreateGraphicsPipeline();
209 if (handle == 0) {
210 handle = program->source_program.handle;
211 } 335 }
212 if (is_built) { 336 if (!pipeline) {
213 ASSERT(handle != 0); 337 return nullptr;
214 } 338 }
339 current_pipeline = pipeline.get();
340 return BuiltPipeline(current_pipeline);
215} 341}
216 342
217Shader::~Shader() = default; 343GraphicsPipeline* ShaderCache::BuiltPipeline(GraphicsPipeline* pipeline) const noexcept {
218 344 if (pipeline->IsBuilt()) {
219GLuint Shader::GetHandle() const { 345 return pipeline;
220 DEBUG_ASSERT(registry->IsConsistent());
221 return handle;
222}
223
224bool Shader::IsBuilt() const {
225 return is_built;
226}
227
228void Shader::AsyncOpenGLBuilt(OGLProgram new_program) {
229 program->source_program = std::move(new_program);
230 handle = program->source_program.handle;
231 is_built = true;
232}
233
234void Shader::AsyncGLASMBuilt(OGLAssemblyProgram new_program) {
235 program->assembly_program = std::move(new_program);
236 handle = program->assembly_program.handle;
237 is_built = true;
238}
239
240std::unique_ptr<Shader> Shader::CreateStageFromMemory(
241 const ShaderParameters& params, Maxwell::ShaderProgram program_type, ProgramCode code,
242 ProgramCode code_b, VideoCommon::Shader::AsyncShaders& async_shaders, VAddr cpu_addr) {
243 const auto shader_type = GetShaderType(program_type);
244
245 auto& gpu = params.gpu;
246 gpu.ShaderNotify().MarkSharderBuilding();
247
248 auto registry = std::make_shared<Registry>(shader_type, gpu.Maxwell3D());
249 if (!async_shaders.IsShaderAsync(gpu) || !params.device.UseAsynchronousShaders()) {
250 const ShaderIR ir(code, STAGE_MAIN_OFFSET, COMPILER_SETTINGS, *registry);
251 // TODO(Rodrigo): Handle VertexA shaders
252 // std::optional<ShaderIR> ir_b;
253 // if (!code_b.empty()) {
254 // ir_b.emplace(code_b, STAGE_MAIN_OFFSET);
255 // }
256 auto program =
257 BuildShader(params.device, shader_type, params.unique_identifier, ir, *registry);
258 ShaderDiskCacheEntry entry;
259 entry.type = shader_type;
260 entry.code = std::move(code);
261 entry.code_b = std::move(code_b);
262 entry.unique_identifier = params.unique_identifier;
263 entry.bound_buffer = registry->GetBoundBuffer();
264 entry.graphics_info = registry->GetGraphicsInfo();
265 entry.keys = registry->GetKeys();
266 entry.bound_samplers = registry->GetBoundSamplers();
267 entry.bindless_samplers = registry->GetBindlessSamplers();
268 params.disk_cache.SaveEntry(std::move(entry));
269
270 gpu.ShaderNotify().MarkShaderComplete();
271
272 return std::unique_ptr<Shader>(new Shader(std::move(registry),
273 MakeEntries(params.device, ir, shader_type),
274 std::move(program), true));
275 } else {
276 // Required for entries
277 const ShaderIR ir(code, STAGE_MAIN_OFFSET, COMPILER_SETTINGS, *registry);
278 auto entries = MakeEntries(params.device, ir, shader_type);
279
280 async_shaders.QueueOpenGLShader(params.device, shader_type, params.unique_identifier,
281 std::move(code), std::move(code_b), STAGE_MAIN_OFFSET,
282 COMPILER_SETTINGS, *registry, cpu_addr);
283
284 auto program = std::make_shared<ProgramHandle>();
285 return std::unique_ptr<Shader>(
286 new Shader(std::move(registry), std::move(entries), std::move(program), false));
287 } 346 }
288} 347 if (!use_asynchronous_shaders) {
289 348 return pipeline;
290std::unique_ptr<Shader> Shader::CreateKernelFromMemory(const ShaderParameters& params,
291 ProgramCode code) {
292 auto& gpu = params.gpu;
293 gpu.ShaderNotify().MarkSharderBuilding();
294
295 auto registry = std::make_shared<Registry>(ShaderType::Compute, params.engine);
296 const ShaderIR ir(code, KERNEL_MAIN_OFFSET, COMPILER_SETTINGS, *registry);
297 const u64 uid = params.unique_identifier;
298 auto program = BuildShader(params.device, ShaderType::Compute, uid, ir, *registry);
299
300 ShaderDiskCacheEntry entry;
301 entry.type = ShaderType::Compute;
302 entry.code = std::move(code);
303 entry.unique_identifier = uid;
304 entry.bound_buffer = registry->GetBoundBuffer();
305 entry.compute_info = registry->GetComputeInfo();
306 entry.keys = registry->GetKeys();
307 entry.bound_samplers = registry->GetBoundSamplers();
308 entry.bindless_samplers = registry->GetBindlessSamplers();
309 params.disk_cache.SaveEntry(std::move(entry));
310
311 gpu.ShaderNotify().MarkShaderComplete();
312
313 return std::unique_ptr<Shader>(new Shader(std::move(registry),
314 MakeEntries(params.device, ir, ShaderType::Compute),
315 std::move(program)));
316}
317
318std::unique_ptr<Shader> Shader::CreateFromCache(const ShaderParameters& params,
319 const PrecompiledShader& precompiled_shader) {
320 return std::unique_ptr<Shader>(new Shader(
321 precompiled_shader.registry, precompiled_shader.entries, precompiled_shader.program));
322}
323
324ShaderCacheOpenGL::ShaderCacheOpenGL(RasterizerOpenGL& rasterizer_,
325 Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_,
326 Tegra::Engines::Maxwell3D& maxwell3d_,
327 Tegra::Engines::KeplerCompute& kepler_compute_,
328 Tegra::MemoryManager& gpu_memory_, const Device& device_)
329 : ShaderCache{rasterizer_}, emu_window{emu_window_}, gpu{gpu_}, gpu_memory{gpu_memory_},
330 maxwell3d{maxwell3d_}, kepler_compute{kepler_compute_}, device{device_} {}
331
332ShaderCacheOpenGL::~ShaderCacheOpenGL() = default;
333
334void ShaderCacheOpenGL::LoadDiskCache(u64 title_id, std::stop_token stop_loading,
335 const VideoCore::DiskResourceLoadCallback& callback) {
336 disk_cache.BindTitleID(title_id);
337 const std::optional transferable = disk_cache.LoadTransferable();
338
339 LOG_INFO(Render_OpenGL, "Total Shader Count: {}",
340 transferable.has_value() ? transferable->size() : 0);
341
342 if (!transferable) {
343 return;
344 } 349 }
345 350 // If something is using depth, we can assume that games are not rendering anything which
346 std::vector<ShaderDiskCachePrecompiled> gl_cache; 351 // will be used one time.
347 if (!device.UseAssemblyShaders() && !device.UseDriverCache()) { 352 if (maxwell3d.regs.zeta_enable) {
348 // Only load precompiled cache when we are not using assembly shaders 353 return nullptr;
349 gl_cache = disk_cache.LoadPrecompiled();
350 } 354 }
351 const auto supported_formats = GetSupportedFormats(); 355 // If games are using a small index count, we can assume these are full screen quads.
352 356 // Usually these shaders are only used once for building textures so we can assume they
353 // Track if precompiled cache was altered during loading to know if we have to 357 // can't be built async
354 // serialize the virtual precompiled cache file back to the hard drive 358 if (maxwell3d.regs.index_array.count <= 6 || maxwell3d.regs.vertex_buffer.count <= 6) {
355 bool precompiled_cache_altered = false; 359 return pipeline;
356
357 // Inform the frontend about shader build initialization
358 if (callback) {
359 callback(VideoCore::LoadCallbackStage::Build, 0, transferable->size());
360 } 360 }
361 return nullptr;
362}
361 363
362 std::mutex mutex; 364ComputePipeline* ShaderCache::CurrentComputePipeline() {
363 std::size_t built_shaders = 0; // It doesn't have be atomic since it's used behind a mutex 365 const VideoCommon::ShaderInfo* const shader{ComputeShader()};
364 std::atomic_bool gl_cache_failed = false; 366 if (!shader) {
365 367 return nullptr;
366 const auto find_precompiled = [&gl_cache](u64 id) {
367 return std::ranges::find(gl_cache, id, &ShaderDiskCachePrecompiled::unique_identifier);
368 };
369
370 const auto worker = [&](Core::Frontend::GraphicsContext* context, std::size_t begin,
371 std::size_t end) {
372 const auto scope = context->Acquire();
373
374 for (std::size_t i = begin; i < end; ++i) {
375 if (stop_loading.stop_requested()) {
376 return;
377 }
378 const auto& entry = (*transferable)[i];
379 const u64 uid = entry.unique_identifier;
380 const auto it = find_precompiled(uid);
381 const auto precompiled_entry = it != gl_cache.end() ? &*it : nullptr;
382
383 const bool is_compute = entry.type == ShaderType::Compute;
384 const u32 main_offset = is_compute ? KERNEL_MAIN_OFFSET : STAGE_MAIN_OFFSET;
385 auto registry = MakeRegistry(entry);
386 const ShaderIR ir(entry.code, main_offset, COMPILER_SETTINGS, *registry);
387
388 ProgramSharedPtr program;
389 if (precompiled_entry) {
390 // If the shader is precompiled, attempt to load it with
391 program = GeneratePrecompiledProgram(entry, *precompiled_entry, supported_formats);
392 if (!program) {
393 gl_cache_failed = true;
394 }
395 }
396 if (!program) {
397 // Otherwise compile it from GLSL
398 program = BuildShader(device, entry.type, uid, ir, *registry, true);
399 }
400
401 PrecompiledShader shader;
402 shader.program = std::move(program);
403 shader.registry = std::move(registry);
404 shader.entries = MakeEntries(device, ir, entry.type);
405
406 std::scoped_lock lock{mutex};
407 if (callback) {
408 callback(VideoCore::LoadCallbackStage::Build, ++built_shaders,
409 transferable->size());
410 }
411 runtime_cache.emplace(entry.unique_identifier, std::move(shader));
412 }
413 };
414
415 const std::size_t num_workers{std::max(1U, std::thread::hardware_concurrency())};
416 const std::size_t bucket_size{transferable->size() / num_workers};
417 std::vector<std::unique_ptr<Core::Frontend::GraphicsContext>> contexts(num_workers);
418 std::vector<std::thread> threads(num_workers);
419 for (std::size_t i = 0; i < num_workers; ++i) {
420 const bool is_last_worker = i + 1 == num_workers;
421 const std::size_t start{bucket_size * i};
422 const std::size_t end{is_last_worker ? transferable->size() : start + bucket_size};
423
424 // On some platforms the shared context has to be created from the GUI thread
425 contexts[i] = emu_window.CreateSharedContext();
426 threads[i] = std::thread(worker, contexts[i].get(), start, end);
427 } 368 }
428 for (auto& thread : threads) { 369 const auto& qmd{kepler_compute.launch_description};
429 thread.join(); 370 const ComputePipelineKey key{
371 .unique_hash = shader->unique_hash,
372 .shared_memory_size = qmd.shared_alloc,
373 .workgroup_size{qmd.block_dim_x, qmd.block_dim_y, qmd.block_dim_z},
374 };
375 const auto [pair, is_new]{compute_cache.try_emplace(key)};
376 auto& pipeline{pair->second};
377 if (!is_new) {
378 return pipeline.get();
430 } 379 }
380 pipeline = CreateComputePipeline(key, shader);
381 return pipeline.get();
382}
431 383
432 if (gl_cache_failed) { 384std::unique_ptr<GraphicsPipeline> ShaderCache::CreateGraphicsPipeline() {
433 // Invalidate the precompiled cache if a shader dumped shader was rejected 385 GraphicsEnvironments environments;
434 disk_cache.InvalidatePrecompiled(); 386 GetGraphicsEnvironments(environments, graphics_key.unique_hashes);
435 precompiled_cache_altered = true;
436 return;
437 }
438 if (stop_loading.stop_requested()) {
439 return;
440 }
441 387
442 if (device.UseAssemblyShaders() || device.UseDriverCache()) { 388 main_pools.ReleaseContents();
443 // Don't store precompiled binaries for assembly shaders or when using the driver cache 389 auto pipeline{CreateGraphicsPipeline(main_pools, graphics_key, environments.Span(),
444 return; 390 use_asynchronous_shaders)};
391 if (!pipeline || shader_cache_filename.empty()) {
392 return pipeline;
445 } 393 }
446 394 boost::container::static_vector<const GenericEnvironment*, Maxwell::MaxShaderProgram> env_ptrs;
447 // TODO(Rodrigo): Do state tracking for transferable shaders and do a dummy draw 395 for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) {
448 // before precompiling them 396 if (graphics_key.unique_hashes[index] != 0) {
449 397 env_ptrs.push_back(&environments.envs[index]);
450 for (std::size_t i = 0; i < transferable->size(); ++i) {
451 const u64 id = (*transferable)[i].unique_identifier;
452 const auto it = find_precompiled(id);
453 if (it == gl_cache.end()) {
454 const GLuint program = runtime_cache.at(id).program->source_program.handle;
455 disk_cache.SavePrecompiled(id, program);
456 precompiled_cache_altered = true;
457 } 398 }
458 } 399 }
459 400 SerializePipeline(graphics_key, env_ptrs, shader_cache_filename, CACHE_VERSION);
460 if (precompiled_cache_altered) { 401 return pipeline;
461 disk_cache.SaveVirtualPrecompiledFile();
462 }
463}
464
465ProgramSharedPtr ShaderCacheOpenGL::GeneratePrecompiledProgram(
466 const ShaderDiskCacheEntry& entry, const ShaderDiskCachePrecompiled& precompiled_entry,
467 const std::unordered_set<GLenum>& supported_formats) {
468 if (!supported_formats.contains(precompiled_entry.binary_format)) {
469 LOG_INFO(Render_OpenGL, "Precompiled cache entry with unsupported format, removing");
470 return {};
471 }
472
473 auto program = std::make_shared<ProgramHandle>();
474 GLuint& handle = program->source_program.handle;
475 handle = glCreateProgram();
476 glProgramParameteri(handle, GL_PROGRAM_SEPARABLE, GL_TRUE);
477 glProgramBinary(handle, precompiled_entry.binary_format, precompiled_entry.binary.data(),
478 static_cast<GLsizei>(precompiled_entry.binary.size()));
479
480 GLint link_status;
481 glGetProgramiv(handle, GL_LINK_STATUS, &link_status);
482 if (link_status == GL_FALSE) {
483 LOG_INFO(Render_OpenGL, "Precompiled cache rejected by the driver, removing");
484 return {};
485 }
486
487 return program;
488} 402}
489 403
490Shader* ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program, 404std::unique_ptr<GraphicsPipeline> ShaderCache::CreateGraphicsPipeline(
491 VideoCommon::Shader::AsyncShaders& async_shaders) { 405 ShaderContext::ShaderPools& pools, const GraphicsPipelineKey& key,
492 if (!maxwell3d.dirty.flags[Dirty::Shaders]) { 406 std::span<Shader::Environment* const> envs, bool build_in_parallel) try {
493 auto* last_shader = last_shaders[static_cast<std::size_t>(program)]; 407 LOG_INFO(Render_OpenGL, "0x{:016x}", key.Hash());
494 if (last_shader->IsBuilt()) { 408 size_t env_index{};
495 return last_shader; 409 u32 total_storage_buffers{};
410 std::array<Shader::IR::Program, Maxwell::MaxShaderProgram> programs;
411 const bool uses_vertex_a{key.unique_hashes[0] != 0};
412 const bool uses_vertex_b{key.unique_hashes[1] != 0};
413 for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) {
414 if (key.unique_hashes[index] == 0) {
415 continue;
496 } 416 }
497 } 417 Shader::Environment& env{*envs[env_index]};
418 ++env_index;
498 419
499 const GPUVAddr address{GetShaderAddress(maxwell3d, program)}; 420 const u32 cfg_offset{static_cast<u32>(env.StartAddress() + sizeof(Shader::ProgramHeader))};
421 Shader::Maxwell::Flow::CFG cfg(env, pools.flow_block, cfg_offset, index == 0);
422 if (!uses_vertex_a || index != 1) {
423 // Normal path
424 programs[index] = TranslateProgram(pools.inst, pools.block, env, cfg, host_info);
500 425
501 if (device.UseAsynchronousShaders() && async_shaders.HasCompletedWork()) { 426 for (const auto& desc : programs[index].info.storage_buffers_descriptors) {
502 auto completed_work = async_shaders.GetCompletedWork(); 427 total_storage_buffers += desc.count;
503 for (auto& work : completed_work) {
504 Shader* shader = TryGet(work.cpu_address);
505 gpu.ShaderNotify().MarkShaderComplete();
506 if (shader == nullptr) {
507 continue;
508 } 428 }
509 using namespace VideoCommon::Shader; 429 } else {
510 if (work.backend == AsyncShaders::Backend::OpenGL) { 430 // VertexB path when VertexA is present.
511 shader->AsyncOpenGLBuilt(std::move(work.program.opengl)); 431 auto& program_va{programs[0]};
512 } else if (work.backend == AsyncShaders::Backend::GLASM) { 432 auto program_vb{TranslateProgram(pools.inst, pools.block, env, cfg, host_info)};
513 shader->AsyncGLASMBuilt(std::move(work.program.glasm)); 433 for (const auto& desc : program_vb.info.storage_buffers_descriptors) {
434 total_storage_buffers += desc.count;
514 } 435 }
515 436 programs[index] = MergeDualVertexPrograms(program_va, program_vb, env);
516 auto& registry = shader->GetRegistry();
517
518 ShaderDiskCacheEntry entry;
519 entry.type = work.shader_type;
520 entry.code = std::move(work.code);
521 entry.code_b = std::move(work.code_b);
522 entry.unique_identifier = work.uid;
523 entry.bound_buffer = registry.GetBoundBuffer();
524 entry.graphics_info = registry.GetGraphicsInfo();
525 entry.keys = registry.GetKeys();
526 entry.bound_samplers = registry.GetBoundSamplers();
527 entry.bindless_samplers = registry.GetBindlessSamplers();
528 disk_cache.SaveEntry(std::move(entry));
529 } 437 }
530 } 438 }
531 439 const u32 glasm_storage_buffer_limit{device.GetMaxGLASMStorageBufferBlocks()};
532 // Look up shader in the cache based on address 440 const bool glasm_use_storage_buffers{total_storage_buffers <= glasm_storage_buffer_limit};
533 const std::optional<VAddr> cpu_addr{gpu_memory.GpuToCpuAddress(address)}; 441
534 if (Shader* const shader{cpu_addr ? TryGet(*cpu_addr) : null_shader.get()}) { 442 std::array<const Shader::Info*, Maxwell::MaxShaderStage> infos{};
535 return last_shaders[static_cast<std::size_t>(program)] = shader; 443
536 } 444 OGLProgram source_program;
537 445 std::array<std::string, 5> sources;
538 const u8* const host_ptr{gpu_memory.GetPointer(address)}; 446 std::array<std::vector<u32>, 5> sources_spirv;
539 447 Shader::Backend::Bindings binding;
540 // No shader found - create a new one 448 Shader::IR::Program* previous_program{};
541 ProgramCode code{GetShaderCode(gpu_memory, address, host_ptr, false)}; 449 const bool use_glasm{device.UseAssemblyShaders()};
542 ProgramCode code_b; 450 const size_t first_index = uses_vertex_a && uses_vertex_b ? 1 : 0;
543 if (program == Maxwell::ShaderProgram::VertexA) { 451 for (size_t index = first_index; index < Maxwell::MaxShaderProgram; ++index) {
544 const GPUVAddr address_b{GetShaderAddress(maxwell3d, Maxwell::ShaderProgram::VertexB)}; 452 if (key.unique_hashes[index] == 0) {
545 const u8* host_ptr_b = gpu_memory.GetPointer(address_b); 453 continue;
546 code_b = GetShaderCode(gpu_memory, address_b, host_ptr_b, false); 454 }
547 } 455 UNIMPLEMENTED_IF(index == 0);
548 const std::size_t code_size = code.size() * sizeof(u64); 456
549 457 Shader::IR::Program& program{programs[index]};
550 const u64 unique_identifier = GetUniqueIdentifier( 458 const size_t stage_index{index - 1};
551 GetShaderType(program), program == Maxwell::ShaderProgram::VertexA, code, code_b); 459 infos[stage_index] = &program.info;
552 460
553 const ShaderParameters params{gpu, maxwell3d, disk_cache, device, 461 const auto runtime_info{
554 *cpu_addr, host_ptr, unique_identifier}; 462 MakeRuntimeInfo(key, program, previous_program, glasm_use_storage_buffers, use_glasm)};
555 463 switch (device.GetShaderBackend()) {
556 std::unique_ptr<Shader> shader; 464 case Settings::ShaderBackend::GLSL:
557 const auto found = runtime_cache.find(unique_identifier); 465 sources[stage_index] = EmitGLSL(profile, runtime_info, program, binding);
558 if (found == runtime_cache.end()) { 466 break;
559 shader = Shader::CreateStageFromMemory(params, program, std::move(code), std::move(code_b), 467 case Settings::ShaderBackend::GLASM:
560 async_shaders, cpu_addr.value_or(0)); 468 sources[stage_index] = EmitGLASM(profile, runtime_info, program, binding);
561 } else { 469 break;
562 shader = Shader::CreateFromCache(params, found->second); 470 case Settings::ShaderBackend::SPIRV:
563 } 471 sources_spirv[stage_index] = EmitSPIRV(profile, runtime_info, program, binding);
564 472 break;
565 Shader* const result = shader.get(); 473 }
566 if (cpu_addr) { 474 previous_program = &program;
567 Register(std::move(shader), *cpu_addr, code_size);
568 } else {
569 null_shader = std::move(shader);
570 } 475 }
476 auto* const thread_worker{build_in_parallel ? workers.get() : nullptr};
477 return std::make_unique<GraphicsPipeline>(
478 device, texture_cache, buffer_cache, gpu_memory, maxwell3d, program_manager, state_tracker,
479 thread_worker, &shader_notify, sources, sources_spirv, infos, key);
571 480
572 return last_shaders[static_cast<std::size_t>(program)] = result; 481} catch (Shader::Exception& exception) {
482 LOG_ERROR(Render_OpenGL, "{}", exception.what());
483 return nullptr;
573} 484}
574 485
575Shader* ShaderCacheOpenGL::GetComputeKernel(GPUVAddr code_addr) { 486std::unique_ptr<ComputePipeline> ShaderCache::CreateComputePipeline(
576 const std::optional<VAddr> cpu_addr{gpu_memory.GpuToCpuAddress(code_addr)}; 487 const ComputePipelineKey& key, const VideoCommon::ShaderInfo* shader) {
577 488 const GPUVAddr program_base{kepler_compute.regs.code_loc.Address()};
578 if (Shader* const kernel = cpu_addr ? TryGet(*cpu_addr) : null_kernel.get()) { 489 const auto& qmd{kepler_compute.launch_description};
579 return kernel; 490 ComputeEnvironment env{kepler_compute, gpu_memory, program_base, qmd.program_start};
580 } 491 env.SetCachedSize(shader->size_bytes);
581 492
582 // No kernel found, create a new one 493 main_pools.ReleaseContents();
583 const u8* host_ptr{gpu_memory.GetPointer(code_addr)}; 494 auto pipeline{CreateComputePipeline(main_pools, key, env)};
584 ProgramCode code{GetShaderCode(gpu_memory, code_addr, host_ptr, true)}; 495 if (!pipeline || shader_cache_filename.empty()) {
585 const std::size_t code_size{code.size() * sizeof(u64)}; 496 return pipeline;
586 const u64 unique_identifier{GetUniqueIdentifier(ShaderType::Compute, false, code)}; 497 }
587 498 SerializePipeline(key, std::array<const GenericEnvironment*, 1>{&env}, shader_cache_filename,
588 const ShaderParameters params{gpu, kepler_compute, disk_cache, device, 499 CACHE_VERSION);
589 *cpu_addr, host_ptr, unique_identifier}; 500 return pipeline;
501}
590 502
591 std::unique_ptr<Shader> kernel; 503std::unique_ptr<ComputePipeline> ShaderCache::CreateComputePipeline(
592 const auto found = runtime_cache.find(unique_identifier); 504 ShaderContext::ShaderPools& pools, const ComputePipelineKey& key,
593 if (found == runtime_cache.end()) { 505 Shader::Environment& env) try {
594 kernel = Shader::CreateKernelFromMemory(params, std::move(code)); 506 LOG_INFO(Render_OpenGL, "0x{:016x}", key.Hash());
595 } else { 507
596 kernel = Shader::CreateFromCache(params, found->second); 508 Shader::Maxwell::Flow::CFG cfg{env, pools.flow_block, env.StartAddress()};
597 } 509 auto program{TranslateProgram(pools.inst, pools.block, env, cfg, host_info)};
510
511 u32 num_storage_buffers{};
512 for (const auto& desc : program.info.storage_buffers_descriptors) {
513 num_storage_buffers += desc.count;
514 }
515 Shader::RuntimeInfo info;
516 info.glasm_use_storage_buffers = num_storage_buffers <= device.GetMaxGLASMStorageBufferBlocks();
517
518 std::string code{};
519 std::vector<u32> code_spirv;
520 switch (device.GetShaderBackend()) {
521 case Settings::ShaderBackend::GLSL:
522 code = EmitGLSL(profile, program);
523 break;
524 case Settings::ShaderBackend::GLASM:
525 code = EmitGLASM(profile, info, program);
526 break;
527 case Settings::ShaderBackend::SPIRV:
528 code_spirv = EmitSPIRV(profile, program);
529 break;
530 }
531
532 return std::make_unique<ComputePipeline>(device, texture_cache, buffer_cache, gpu_memory,
533 kepler_compute, program_manager, program.info, code,
534 code_spirv);
535} catch (Shader::Exception& exception) {
536 LOG_ERROR(Render_OpenGL, "{}", exception.what());
537 return nullptr;
538}
598 539
599 Shader* const result = kernel.get(); 540std::unique_ptr<ShaderWorker> ShaderCache::CreateWorkers() const {
600 if (cpu_addr) { 541 return std::make_unique<ShaderWorker>(std::max(std::thread::hardware_concurrency(), 2U) - 1,
601 Register(std::move(kernel), *cpu_addr, code_size); 542 "yuzu:ShaderBuilder",
602 } else { 543 [this] { return Context{emu_window}; });
603 null_kernel = std::move(kernel);
604 }
605 return result;
606} 544}
607 545
608} // namespace OpenGL 546} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h
index b30308b6f..a34110b37 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.h
+++ b/src/video_core/renderer_opengl/gl_shader_cache.h
@@ -5,157 +5,93 @@
5#pragma once 5#pragma once
6 6
7#include <array> 7#include <array>
8#include <atomic> 8#include <filesystem>
9#include <bitset> 9#include <stop_token>
10#include <memory>
11#include <string>
12#include <tuple>
13#include <unordered_map> 10#include <unordered_map>
14#include <unordered_set>
15#include <vector>
16 11
17#include <glad/glad.h> 12#include <glad/glad.h>
18 13
19#include "common/common_types.h" 14#include "common/common_types.h"
20#include "video_core/engines/shader_type.h" 15#include "common/thread_worker.h"
21#include "video_core/renderer_opengl/gl_resource_manager.h" 16#include "shader_recompiler/frontend/ir/value.h"
22#include "video_core/renderer_opengl/gl_shader_decompiler.h" 17#include "shader_recompiler/host_translate_info.h"
23#include "video_core/renderer_opengl/gl_shader_disk_cache.h" 18#include "shader_recompiler/object_pool.h"
24#include "video_core/shader/registry.h" 19#include "shader_recompiler/profile.h"
25#include "video_core/shader/shader_ir.h" 20#include "video_core/renderer_opengl/gl_compute_pipeline.h"
21#include "video_core/renderer_opengl/gl_graphics_pipeline.h"
22#include "video_core/renderer_opengl/gl_shader_context.h"
26#include "video_core/shader_cache.h" 23#include "video_core/shader_cache.h"
27 24
28namespace Tegra { 25namespace Tegra {
29class MemoryManager; 26class MemoryManager;
30} 27}
31 28
32namespace Core::Frontend {
33class EmuWindow;
34}
35
36namespace VideoCommon::Shader {
37class AsyncShaders;
38}
39
40namespace OpenGL { 29namespace OpenGL {
41 30
42class Device; 31class Device;
32class ProgramManager;
43class RasterizerOpenGL; 33class RasterizerOpenGL;
34using ShaderWorker = Common::StatefulThreadWorker<ShaderContext::Context>;
44 35
45using Maxwell = Tegra::Engines::Maxwell3D::Regs; 36class ShaderCache : public VideoCommon::ShaderCache {
46
47struct ProgramHandle {
48 OGLProgram source_program;
49 OGLAssemblyProgram assembly_program;
50};
51using ProgramSharedPtr = std::shared_ptr<ProgramHandle>;
52
53struct PrecompiledShader {
54 ProgramSharedPtr program;
55 std::shared_ptr<VideoCommon::Shader::Registry> registry;
56 ShaderEntries entries;
57};
58
59struct ShaderParameters {
60 Tegra::GPU& gpu;
61 Tegra::Engines::ConstBufferEngineInterface& engine;
62 ShaderDiskCacheOpenGL& disk_cache;
63 const Device& device;
64 VAddr cpu_addr;
65 const u8* host_ptr;
66 u64 unique_identifier;
67};
68
69ProgramSharedPtr BuildShader(const Device& device, Tegra::Engines::ShaderType shader_type,
70 u64 unique_identifier, const VideoCommon::Shader::ShaderIR& ir,
71 const VideoCommon::Shader::Registry& registry,
72 bool hint_retrievable = false);
73
74class Shader final {
75public: 37public:
76 ~Shader(); 38 explicit ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindow& emu_window_,
77 39 Tegra::Engines::Maxwell3D& maxwell3d_,
78 /// Gets the GL program handle for the shader 40 Tegra::Engines::KeplerCompute& kepler_compute_,
79 GLuint GetHandle() const; 41 Tegra::MemoryManager& gpu_memory_, const Device& device_,
80 42 TextureCache& texture_cache_, BufferCache& buffer_cache_,
81 bool IsBuilt() const; 43 ProgramManager& program_manager_, StateTracker& state_tracker_,
82 44 VideoCore::ShaderNotify& shader_notify_);
83 /// Gets the shader entries for the shader 45 ~ShaderCache();
84 const ShaderEntries& GetEntries() const {
85 return entries;
86 }
87
88 const VideoCommon::Shader::Registry& GetRegistry() const {
89 return *registry;
90 }
91
92 /// Mark a OpenGL shader as built
93 void AsyncOpenGLBuilt(OGLProgram new_program);
94 46
95 /// Mark a GLASM shader as built 47 void LoadDiskResources(u64 title_id, std::stop_token stop_loading,
96 void AsyncGLASMBuilt(OGLAssemblyProgram new_program); 48 const VideoCore::DiskResourceLoadCallback& callback);
97 49
98 static std::unique_ptr<Shader> CreateStageFromMemory( 50 [[nodiscard]] GraphicsPipeline* CurrentGraphicsPipeline();
99 const ShaderParameters& params, Maxwell::ShaderProgram program_type,
100 ProgramCode program_code, ProgramCode program_code_b,
101 VideoCommon::Shader::AsyncShaders& async_shaders, VAddr cpu_addr);
102 51
103 static std::unique_ptr<Shader> CreateKernelFromMemory(const ShaderParameters& params, 52 [[nodiscard]] ComputePipeline* CurrentComputePipeline();
104 ProgramCode code);
105
106 static std::unique_ptr<Shader> CreateFromCache(const ShaderParameters& params,
107 const PrecompiledShader& precompiled_shader);
108 53
109private: 54private:
110 explicit Shader(std::shared_ptr<VideoCommon::Shader::Registry> registry, ShaderEntries entries, 55 GraphicsPipeline* CurrentGraphicsPipelineSlowPath();
111 ProgramSharedPtr program, bool is_built_ = true);
112
113 std::shared_ptr<VideoCommon::Shader::Registry> registry;
114 ShaderEntries entries;
115 ProgramSharedPtr program;
116 GLuint handle = 0;
117 bool is_built{};
118};
119 56
120class ShaderCacheOpenGL final : public VideoCommon::ShaderCache<Shader> { 57 [[nodiscard]] GraphicsPipeline* BuiltPipeline(GraphicsPipeline* pipeline) const noexcept;
121public:
122 explicit ShaderCacheOpenGL(RasterizerOpenGL& rasterizer_,
123 Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu,
124 Tegra::Engines::Maxwell3D& maxwell3d_,
125 Tegra::Engines::KeplerCompute& kepler_compute_,
126 Tegra::MemoryManager& gpu_memory_, const Device& device_);
127 ~ShaderCacheOpenGL() override;
128 58
129 /// Loads disk cache for the current game 59 std::unique_ptr<GraphicsPipeline> CreateGraphicsPipeline();
130 void LoadDiskCache(u64 title_id, std::stop_token stop_loading,
131 const VideoCore::DiskResourceLoadCallback& callback);
132 60
133 /// Gets the current specified shader stage program 61 std::unique_ptr<GraphicsPipeline> CreateGraphicsPipeline(
134 Shader* GetStageProgram(Maxwell::ShaderProgram program, 62 ShaderContext::ShaderPools& pools, const GraphicsPipelineKey& key,
135 VideoCommon::Shader::AsyncShaders& async_shaders); 63 std::span<Shader::Environment* const> envs, bool build_in_parallel);
136 64
137 /// Gets a compute kernel in the passed address 65 std::unique_ptr<ComputePipeline> CreateComputePipeline(const ComputePipelineKey& key,
138 Shader* GetComputeKernel(GPUVAddr code_addr); 66 const VideoCommon::ShaderInfo* shader);
139 67
140private: 68 std::unique_ptr<ComputePipeline> CreateComputePipeline(ShaderContext::ShaderPools& pools,
141 ProgramSharedPtr GeneratePrecompiledProgram( 69 const ComputePipelineKey& key,
142 const ShaderDiskCacheEntry& entry, const ShaderDiskCachePrecompiled& precompiled_entry, 70 Shader::Environment& env);
143 const std::unordered_set<GLenum>& supported_formats); 71
72 std::unique_ptr<ShaderWorker> CreateWorkers() const;
144 73
145 Core::Frontend::EmuWindow& emu_window; 74 Core::Frontend::EmuWindow& emu_window;
146 Tegra::GPU& gpu;
147 Tegra::MemoryManager& gpu_memory;
148 Tegra::Engines::Maxwell3D& maxwell3d;
149 Tegra::Engines::KeplerCompute& kepler_compute;
150 const Device& device; 75 const Device& device;
76 TextureCache& texture_cache;
77 BufferCache& buffer_cache;
78 ProgramManager& program_manager;
79 StateTracker& state_tracker;
80 VideoCore::ShaderNotify& shader_notify;
81 const bool use_asynchronous_shaders;
82
83 GraphicsPipelineKey graphics_key{};
84 GraphicsPipeline* current_pipeline{};
151 85
152 ShaderDiskCacheOpenGL disk_cache; 86 ShaderContext::ShaderPools main_pools;
153 std::unordered_map<u64, PrecompiledShader> runtime_cache; 87 std::unordered_map<GraphicsPipelineKey, std::unique_ptr<GraphicsPipeline>> graphics_cache;
88 std::unordered_map<ComputePipelineKey, std::unique_ptr<ComputePipeline>> compute_cache;
154 89
155 std::unique_ptr<Shader> null_shader; 90 Shader::Profile profile;
156 std::unique_ptr<Shader> null_kernel; 91 Shader::HostTranslateInfo host_info;
157 92
158 std::array<Shader*, Maxwell::MaxShaderProgram> last_shaders{}; 93 std::filesystem::path shader_cache_filename;
94 std::unique_ptr<ShaderWorker> workers;
159}; 95};
160 96
161} // namespace OpenGL 97} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_shader_context.h b/src/video_core/renderer_opengl/gl_shader_context.h
new file mode 100644
index 000000000..6ff34e5d6
--- /dev/null
+++ b/src/video_core/renderer_opengl/gl_shader_context.h
@@ -0,0 +1,33 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include "core/frontend/emu_window.h"
8#include "shader_recompiler/frontend/ir/basic_block.h"
9#include "shader_recompiler/frontend/maxwell/control_flow.h"
10
11namespace OpenGL::ShaderContext {
12struct ShaderPools {
13 void ReleaseContents() {
14 flow_block.ReleaseContents();
15 block.ReleaseContents();
16 inst.ReleaseContents();
17 }
18
19 Shader::ObjectPool<Shader::IR::Inst> inst;
20 Shader::ObjectPool<Shader::IR::Block> block;
21 Shader::ObjectPool<Shader::Maxwell::Flow::Block> flow_block;
22};
23
24struct Context {
25 explicit Context(Core::Frontend::EmuWindow& emu_window)
26 : gl_context{emu_window.CreateSharedContext()}, scoped{*gl_context} {}
27
28 std::unique_ptr<Core::Frontend::GraphicsContext> gl_context;
29 Core::Frontend::GraphicsContext::Scoped scoped;
30 ShaderPools pools;
31};
32
33} // namespace OpenGL::ShaderContext
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
deleted file mode 100644
index 9c28498e8..000000000
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ /dev/null
@@ -1,2986 +0,0 @@
1// Copyright 2018 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <array>
6#include <string>
7#include <string_view>
8#include <utility>
9#include <variant>
10#include <vector>
11
12#include <fmt/format.h>
13
14#include "common/alignment.h"
15#include "common/assert.h"
16#include "common/common_types.h"
17#include "common/div_ceil.h"
18#include "common/logging/log.h"
19#include "video_core/engines/maxwell_3d.h"
20#include "video_core/engines/shader_type.h"
21#include "video_core/renderer_opengl/gl_device.h"
22#include "video_core/renderer_opengl/gl_rasterizer.h"
23#include "video_core/renderer_opengl/gl_shader_decompiler.h"
24#include "video_core/shader/ast.h"
25#include "video_core/shader/node.h"
26#include "video_core/shader/shader_ir.h"
27#include "video_core/shader/transform_feedback.h"
28
29namespace OpenGL {
30
31namespace {
32
33using Tegra::Engines::ShaderType;
34using Tegra::Shader::Attribute;
35using Tegra::Shader::Header;
36using Tegra::Shader::IpaInterpMode;
37using Tegra::Shader::IpaMode;
38using Tegra::Shader::IpaSampleMode;
39using Tegra::Shader::PixelImap;
40using Tegra::Shader::Register;
41using Tegra::Shader::TextureType;
42
43using namespace VideoCommon::Shader;
44using namespace std::string_literals;
45
46using Maxwell = Tegra::Engines::Maxwell3D::Regs;
47using Operation = const OperationNode&;
48
49class ASTDecompiler;
50class ExprDecompiler;
51
52enum class Type { Void, Bool, Bool2, Float, Int, Uint, HalfFloat };
53
54constexpr std::array FLOAT_TYPES{"float", "vec2", "vec3", "vec4"};
55
56constexpr std::string_view INPUT_ATTRIBUTE_NAME = "in_attr";
57constexpr std::string_view OUTPUT_ATTRIBUTE_NAME = "out_attr";
58
59struct TextureOffset {};
60struct TextureDerivates {};
61using TextureArgument = std::pair<Type, Node>;
62using TextureIR = std::variant<TextureOffset, TextureDerivates, TextureArgument>;
63
64constexpr u32 MAX_CONSTBUFFER_SCALARS = static_cast<u32>(Maxwell::MaxConstBufferSize) / sizeof(u32);
65constexpr u32 MAX_CONSTBUFFER_ELEMENTS = MAX_CONSTBUFFER_SCALARS / sizeof(u32);
66
67constexpr std::string_view COMMON_DECLARATIONS = R"(#define ftoi floatBitsToInt
68#define ftou floatBitsToUint
69#define itof intBitsToFloat
70#define utof uintBitsToFloat
71
72bvec2 HalfFloatNanComparison(bvec2 comparison, vec2 pair1, vec2 pair2) {{
73 bvec2 is_nan1 = isnan(pair1);
74 bvec2 is_nan2 = isnan(pair2);
75 return bvec2(comparison.x || is_nan1.x || is_nan2.x, comparison.y || is_nan1.y || is_nan2.y);
76}}
77
78const float fswzadd_modifiers_a[] = float[4](-1.0f, 1.0f, -1.0f, 0.0f );
79const float fswzadd_modifiers_b[] = float[4](-1.0f, -1.0f, 1.0f, -1.0f );
80)";
81
82class ShaderWriter final {
83public:
84 void AddExpression(std::string_view text) {
85 DEBUG_ASSERT(scope >= 0);
86 if (!text.empty()) {
87 AppendIndentation();
88 }
89 shader_source += text;
90 }
91
92 // Forwards all arguments directly to libfmt.
93 // Note that all formatting requirements for fmt must be
94 // obeyed when using this function. (e.g. {{ must be used
95 // printing the character '{' is desirable. Ditto for }} and '}',
96 // etc).
97 template <typename... Args>
98 void AddLine(std::string_view text, Args&&... args) {
99 AddExpression(fmt::format(fmt::runtime(text), std::forward<Args>(args)...));
100 AddNewLine();
101 }
102
103 void AddNewLine() {
104 DEBUG_ASSERT(scope >= 0);
105 shader_source += '\n';
106 }
107
108 std::string GenerateTemporary() {
109 return fmt::format("tmp{}", temporary_index++);
110 }
111
112 std::string GetResult() {
113 return std::move(shader_source);
114 }
115
116 s32 scope = 0;
117
118private:
119 void AppendIndentation() {
120 shader_source.append(static_cast<std::size_t>(scope) * 4, ' ');
121 }
122
123 std::string shader_source;
124 u32 temporary_index = 1;
125};
126
127class Expression final {
128public:
129 Expression(std::string code_, Type type_) : code{std::move(code_)}, type{type_} {
130 ASSERT(type != Type::Void);
131 }
132 Expression() : type{Type::Void} {}
133
134 Type GetType() const {
135 return type;
136 }
137
138 std::string GetCode() const {
139 return code;
140 }
141
142 void CheckVoid() const {
143 ASSERT(type == Type::Void);
144 }
145
146 std::string As(Type type_) const {
147 switch (type_) {
148 case Type::Bool:
149 return AsBool();
150 case Type::Bool2:
151 return AsBool2();
152 case Type::Float:
153 return AsFloat();
154 case Type::Int:
155 return AsInt();
156 case Type::Uint:
157 return AsUint();
158 case Type::HalfFloat:
159 return AsHalfFloat();
160 default:
161 UNREACHABLE_MSG("Invalid type");
162 return code;
163 }
164 }
165
166 std::string AsBool() const {
167 switch (type) {
168 case Type::Bool:
169 return code;
170 default:
171 UNREACHABLE_MSG("Incompatible types");
172 return code;
173 }
174 }
175
176 std::string AsBool2() const {
177 switch (type) {
178 case Type::Bool2:
179 return code;
180 default:
181 UNREACHABLE_MSG("Incompatible types");
182 return code;
183 }
184 }
185
186 std::string AsFloat() const {
187 switch (type) {
188 case Type::Float:
189 return code;
190 case Type::Uint:
191 return fmt::format("utof({})", code);
192 case Type::Int:
193 return fmt::format("itof({})", code);
194 case Type::HalfFloat:
195 return fmt::format("utof(packHalf2x16({}))", code);
196 default:
197 UNREACHABLE_MSG("Incompatible types");
198 return code;
199 }
200 }
201
202 std::string AsInt() const {
203 switch (type) {
204 case Type::Float:
205 return fmt::format("ftoi({})", code);
206 case Type::Uint:
207 return fmt::format("int({})", code);
208 case Type::Int:
209 return code;
210 case Type::HalfFloat:
211 return fmt::format("int(packHalf2x16({}))", code);
212 default:
213 UNREACHABLE_MSG("Incompatible types");
214 return code;
215 }
216 }
217
218 std::string AsUint() const {
219 switch (type) {
220 case Type::Float:
221 return fmt::format("ftou({})", code);
222 case Type::Uint:
223 return code;
224 case Type::Int:
225 return fmt::format("uint({})", code);
226 case Type::HalfFloat:
227 return fmt::format("packHalf2x16({})", code);
228 default:
229 UNREACHABLE_MSG("Incompatible types");
230 return code;
231 }
232 }
233
234 std::string AsHalfFloat() const {
235 switch (type) {
236 case Type::Float:
237 return fmt::format("unpackHalf2x16(ftou({}))", code);
238 case Type::Uint:
239 return fmt::format("unpackHalf2x16({})", code);
240 case Type::Int:
241 return fmt::format("unpackHalf2x16(int({}))", code);
242 case Type::HalfFloat:
243 return code;
244 default:
245 UNREACHABLE_MSG("Incompatible types");
246 return code;
247 }
248 }
249
250private:
251 std::string code;
252 Type type{};
253};
254
255const char* GetTypeString(Type type) {
256 switch (type) {
257 case Type::Bool:
258 return "bool";
259 case Type::Bool2:
260 return "bvec2";
261 case Type::Float:
262 return "float";
263 case Type::Int:
264 return "int";
265 case Type::Uint:
266 return "uint";
267 case Type::HalfFloat:
268 return "vec2";
269 default:
270 UNREACHABLE_MSG("Invalid type");
271 return "<invalid type>";
272 }
273}
274
275const char* GetImageTypeDeclaration(Tegra::Shader::ImageType image_type) {
276 switch (image_type) {
277 case Tegra::Shader::ImageType::Texture1D:
278 return "1D";
279 case Tegra::Shader::ImageType::TextureBuffer:
280 return "Buffer";
281 case Tegra::Shader::ImageType::Texture1DArray:
282 return "1DArray";
283 case Tegra::Shader::ImageType::Texture2D:
284 return "2D";
285 case Tegra::Shader::ImageType::Texture2DArray:
286 return "2DArray";
287 case Tegra::Shader::ImageType::Texture3D:
288 return "3D";
289 default:
290 UNREACHABLE();
291 return "1D";
292 }
293}
294
295/// Describes primitive behavior on geometry shaders
296std::pair<const char*, u32> GetPrimitiveDescription(Maxwell::PrimitiveTopology topology) {
297 switch (topology) {
298 case Maxwell::PrimitiveTopology::Points:
299 return {"points", 1};
300 case Maxwell::PrimitiveTopology::Lines:
301 case Maxwell::PrimitiveTopology::LineStrip:
302 return {"lines", 2};
303 case Maxwell::PrimitiveTopology::LinesAdjacency:
304 case Maxwell::PrimitiveTopology::LineStripAdjacency:
305 return {"lines_adjacency", 4};
306 case Maxwell::PrimitiveTopology::Triangles:
307 case Maxwell::PrimitiveTopology::TriangleStrip:
308 case Maxwell::PrimitiveTopology::TriangleFan:
309 return {"triangles", 3};
310 case Maxwell::PrimitiveTopology::TrianglesAdjacency:
311 case Maxwell::PrimitiveTopology::TriangleStripAdjacency:
312 return {"triangles_adjacency", 6};
313 default:
314 UNIMPLEMENTED_MSG("topology={}", topology);
315 return {"points", 1};
316 }
317}
318
319/// Generates code to use for a swizzle operation.
320constexpr const char* GetSwizzle(std::size_t element) {
321 constexpr std::array swizzle = {".x", ".y", ".z", ".w"};
322 return swizzle.at(element);
323}
324
325constexpr const char* GetColorSwizzle(std::size_t element) {
326 constexpr std::array swizzle = {".r", ".g", ".b", ".a"};
327 return swizzle.at(element);
328}
329
330/// Translate topology
331std::string GetTopologyName(Tegra::Shader::OutputTopology topology) {
332 switch (topology) {
333 case Tegra::Shader::OutputTopology::PointList:
334 return "points";
335 case Tegra::Shader::OutputTopology::LineStrip:
336 return "line_strip";
337 case Tegra::Shader::OutputTopology::TriangleStrip:
338 return "triangle_strip";
339 default:
340 UNIMPLEMENTED_MSG("Unknown output topology: {}", topology);
341 return "points";
342 }
343}
344
345/// Returns true if an object has to be treated as precise
346bool IsPrecise(Operation operand) {
347 const auto& meta{operand.GetMeta()};
348 if (const auto arithmetic = std::get_if<MetaArithmetic>(&meta)) {
349 return arithmetic->precise;
350 }
351 return false;
352}
353
354bool IsPrecise(const Node& node) {
355 if (const auto operation = std::get_if<OperationNode>(&*node)) {
356 return IsPrecise(*operation);
357 }
358 return false;
359}
360
361constexpr bool IsGenericAttribute(Attribute::Index index) {
362 return index >= Attribute::Index::Attribute_0 && index <= Attribute::Index::Attribute_31;
363}
364
365constexpr bool IsLegacyTexCoord(Attribute::Index index) {
366 return static_cast<int>(index) >= static_cast<int>(Attribute::Index::TexCoord_0) &&
367 static_cast<int>(index) <= static_cast<int>(Attribute::Index::TexCoord_7);
368}
369
370constexpr Attribute::Index ToGenericAttribute(u64 value) {
371 return static_cast<Attribute::Index>(value + static_cast<u64>(Attribute::Index::Attribute_0));
372}
373
374constexpr int GetLegacyTexCoordIndex(Attribute::Index index) {
375 return static_cast<int>(index) - static_cast<int>(Attribute::Index::TexCoord_0);
376}
377
378u32 GetGenericAttributeIndex(Attribute::Index index) {
379 ASSERT(IsGenericAttribute(index));
380 return static_cast<u32>(index) - static_cast<u32>(Attribute::Index::Attribute_0);
381}
382
383constexpr const char* GetFlowStackPrefix(MetaStackClass stack) {
384 switch (stack) {
385 case MetaStackClass::Ssy:
386 return "ssy";
387 case MetaStackClass::Pbk:
388 return "pbk";
389 }
390 return {};
391}
392
393std::string FlowStackName(MetaStackClass stack) {
394 return fmt::format("{}_flow_stack", GetFlowStackPrefix(stack));
395}
396
397std::string FlowStackTopName(MetaStackClass stack) {
398 return fmt::format("{}_flow_stack_top", GetFlowStackPrefix(stack));
399}
400
401struct GenericVaryingDescription {
402 std::string name;
403 u8 first_element = 0;
404 bool is_scalar = false;
405};
406
407class GLSLDecompiler final {
408public:
409 explicit GLSLDecompiler(const Device& device_, const ShaderIR& ir_, const Registry& registry_,
410 ShaderType stage_, std::string_view identifier_,
411 std::string_view suffix_)
412 : device{device_}, ir{ir_}, registry{registry_}, stage{stage_},
413 identifier{identifier_}, suffix{suffix_}, header{ir.GetHeader()} {
414 if (stage != ShaderType::Compute) {
415 transform_feedback = BuildTransformFeedback(registry.GetGraphicsInfo());
416 }
417 }
418
419 void Decompile() {
420 DeclareHeader();
421 DeclareVertex();
422 DeclareGeometry();
423 DeclareFragment();
424 DeclareCompute();
425 DeclareInputAttributes();
426 DeclareOutputAttributes();
427 DeclareImages();
428 DeclareSamplers();
429 DeclareGlobalMemory();
430 DeclareConstantBuffers();
431 DeclareLocalMemory();
432 DeclareRegisters();
433 DeclarePredicates();
434 DeclareInternalFlags();
435 DeclareCustomVariables();
436 DeclarePhysicalAttributeReader();
437
438 code.AddLine("void main() {{");
439 ++code.scope;
440
441 if (stage == ShaderType::Vertex) {
442 code.AddLine("gl_Position = vec4(0.0f, 0.0f, 0.0f, 1.0f);");
443 }
444
445 if (ir.IsDecompiled()) {
446 DecompileAST();
447 } else {
448 DecompileBranchMode();
449 }
450
451 --code.scope;
452 code.AddLine("}}");
453 }
454
455 std::string GetResult() {
456 return code.GetResult();
457 }
458
459private:
460 friend class ASTDecompiler;
461 friend class ExprDecompiler;
462
463 void DecompileBranchMode() {
464 // VM's program counter
465 const auto first_address = ir.GetBasicBlocks().begin()->first;
466 code.AddLine("uint jmp_to = {}U;", first_address);
467
468 // TODO(Subv): Figure out the actual depth of the flow stack, for now it seems
469 // unlikely that shaders will use 20 nested SSYs and PBKs.
470 constexpr u32 FLOW_STACK_SIZE = 20;
471 if (!ir.IsFlowStackDisabled()) {
472 for (const auto stack : std::array{MetaStackClass::Ssy, MetaStackClass::Pbk}) {
473 code.AddLine("uint {}[{}];", FlowStackName(stack), FLOW_STACK_SIZE);
474 code.AddLine("uint {} = 0U;", FlowStackTopName(stack));
475 }
476 }
477
478 code.AddLine("while (true) {{");
479 ++code.scope;
480
481 code.AddLine("switch (jmp_to) {{");
482
483 for (const auto& pair : ir.GetBasicBlocks()) {
484 const auto& [address, bb] = pair;
485 code.AddLine("case 0x{:X}U: {{", address);
486 ++code.scope;
487
488 VisitBlock(bb);
489
490 --code.scope;
491 code.AddLine("}}");
492 }
493
494 code.AddLine("default: return;");
495 code.AddLine("}}");
496
497 --code.scope;
498 code.AddLine("}}");
499 }
500
501 void DecompileAST();
502
503 void DeclareHeader() {
504 if (!identifier.empty()) {
505 code.AddLine("// {}", identifier);
506 }
507 const bool use_compatibility = ir.UsesLegacyVaryings() || ir.UsesYNegate();
508 code.AddLine("#version 440 {}", use_compatibility ? "compatibility" : "core");
509 code.AddLine("#extension GL_ARB_separate_shader_objects : enable");
510 if (device.HasShaderBallot()) {
511 code.AddLine("#extension GL_ARB_shader_ballot : require");
512 }
513 if (device.HasVertexViewportLayer()) {
514 code.AddLine("#extension GL_ARB_shader_viewport_layer_array : require");
515 }
516 if (device.HasImageLoadFormatted()) {
517 code.AddLine("#extension GL_EXT_shader_image_load_formatted : require");
518 }
519 if (device.HasTextureShadowLod()) {
520 code.AddLine("#extension GL_EXT_texture_shadow_lod : require");
521 }
522 if (device.HasWarpIntrinsics()) {
523 code.AddLine("#extension GL_NV_gpu_shader5 : require");
524 code.AddLine("#extension GL_NV_shader_thread_group : require");
525 code.AddLine("#extension GL_NV_shader_thread_shuffle : require");
526 }
527 // This pragma stops Nvidia's driver from over optimizing math (probably using fp16
528 // operations) on places where we don't want to.
529 // Thanks to Ryujinx for finding this workaround.
530 code.AddLine("#pragma optionNV(fastmath off)");
531
532 code.AddNewLine();
533
534 code.AddLine(COMMON_DECLARATIONS);
535 }
536
537 void DeclareVertex() {
538 if (stage != ShaderType::Vertex) {
539 return;
540 }
541
542 DeclareVertexRedeclarations();
543 }
544
545 void DeclareGeometry() {
546 if (stage != ShaderType::Geometry) {
547 return;
548 }
549
550 const auto& info = registry.GetGraphicsInfo();
551 const auto input_topology = info.primitive_topology;
552 const auto [glsl_topology, max_vertices] = GetPrimitiveDescription(input_topology);
553 max_input_vertices = max_vertices;
554 code.AddLine("layout ({}) in;", glsl_topology);
555
556 const auto topology = GetTopologyName(header.common3.output_topology);
557 const auto max_output_vertices = header.common4.max_output_vertices.Value();
558 code.AddLine("layout ({}, max_vertices = {}) out;", topology, max_output_vertices);
559 code.AddNewLine();
560
561 code.AddLine("in gl_PerVertex {{");
562 ++code.scope;
563 code.AddLine("vec4 gl_Position;");
564 --code.scope;
565 code.AddLine("}} gl_in[];");
566
567 DeclareVertexRedeclarations();
568 }
569
570 void DeclareFragment() {
571 if (stage != ShaderType::Fragment) {
572 return;
573 }
574 if (ir.UsesLegacyVaryings()) {
575 code.AddLine("in gl_PerFragment {{");
576 ++code.scope;
577 code.AddLine("vec4 gl_TexCoord[8];");
578 code.AddLine("vec4 gl_Color;");
579 code.AddLine("vec4 gl_SecondaryColor;");
580 --code.scope;
581 code.AddLine("}};");
582 }
583
584 for (u32 rt = 0; rt < Maxwell::NumRenderTargets; ++rt) {
585 code.AddLine("layout (location = {}) out vec4 frag_color{};", rt, rt);
586 }
587 }
588
589 void DeclareCompute() {
590 if (stage != ShaderType::Compute) {
591 return;
592 }
593 const auto& info = registry.GetComputeInfo();
594 if (u32 size = info.shared_memory_size_in_words * 4; size > 0) {
595 const u32 limit = device.GetMaxComputeSharedMemorySize();
596 if (size > limit) {
597 LOG_ERROR(Render_OpenGL, "Shared memory size {} is clamped to host's limit {}",
598 size, limit);
599 size = limit;
600 }
601
602 code.AddLine("shared uint smem[{}];", size / 4);
603 code.AddNewLine();
604 }
605 code.AddLine("layout (local_size_x = {}, local_size_y = {}, local_size_z = {}) in;",
606 info.workgroup_size[0], info.workgroup_size[1], info.workgroup_size[2]);
607 code.AddNewLine();
608 }
609
610 void DeclareVertexRedeclarations() {
611 code.AddLine("out gl_PerVertex {{");
612 ++code.scope;
613
614 auto pos_xfb = GetTransformFeedbackDecoration(Attribute::Index::Position);
615 if (!pos_xfb.empty()) {
616 pos_xfb = fmt::format("layout ({}) ", pos_xfb);
617 }
618 const char* pos_type =
619 FLOAT_TYPES.at(GetNumComponents(Attribute::Index::Position).value_or(4) - 1);
620 code.AddLine("{}{} gl_Position;", pos_xfb, pos_type);
621
622 for (const auto attribute : ir.GetOutputAttributes()) {
623 if (attribute == Attribute::Index::ClipDistances0123 ||
624 attribute == Attribute::Index::ClipDistances4567) {
625 code.AddLine("float gl_ClipDistance[];");
626 break;
627 }
628 }
629
630 if (stage != ShaderType::Geometry &&
631 (stage != ShaderType::Vertex || device.HasVertexViewportLayer())) {
632 if (ir.UsesLayer()) {
633 code.AddLine("int gl_Layer;");
634 }
635 if (ir.UsesViewportIndex()) {
636 code.AddLine("int gl_ViewportIndex;");
637 }
638 } else if ((ir.UsesLayer() || ir.UsesViewportIndex()) && stage == ShaderType::Vertex &&
639 !device.HasVertexViewportLayer()) {
640 LOG_ERROR(
641 Render_OpenGL,
642 "GL_ARB_shader_viewport_layer_array is not available and its required by a shader");
643 }
644
645 if (ir.UsesPointSize()) {
646 code.AddLine("float gl_PointSize;");
647 }
648
649 if (ir.UsesLegacyVaryings()) {
650 code.AddLine("vec4 gl_TexCoord[8];");
651 code.AddLine("vec4 gl_FrontColor;");
652 code.AddLine("vec4 gl_FrontSecondaryColor;");
653 code.AddLine("vec4 gl_BackColor;");
654 code.AddLine("vec4 gl_BackSecondaryColor;");
655 }
656
657 --code.scope;
658 code.AddLine("}};");
659 code.AddNewLine();
660
661 if (stage == ShaderType::Geometry) {
662 if (ir.UsesLayer()) {
663 code.AddLine("out int gl_Layer;");
664 }
665 if (ir.UsesViewportIndex()) {
666 code.AddLine("out int gl_ViewportIndex;");
667 }
668 }
669 code.AddNewLine();
670 }
671
672 void DeclareRegisters() {
673 const auto& registers = ir.GetRegisters();
674 for (const u32 gpr : registers) {
675 code.AddLine("float {} = 0.0f;", GetRegister(gpr));
676 }
677 if (!registers.empty()) {
678 code.AddNewLine();
679 }
680 }
681
682 void DeclareCustomVariables() {
683 const u32 num_custom_variables = ir.GetNumCustomVariables();
684 for (u32 i = 0; i < num_custom_variables; ++i) {
685 code.AddLine("float {} = 0.0f;", GetCustomVariable(i));
686 }
687 if (num_custom_variables > 0) {
688 code.AddNewLine();
689 }
690 }
691
692 void DeclarePredicates() {
693 const auto& predicates = ir.GetPredicates();
694 for (const auto pred : predicates) {
695 code.AddLine("bool {} = false;", GetPredicate(pred));
696 }
697 if (!predicates.empty()) {
698 code.AddNewLine();
699 }
700 }
701
702 void DeclareLocalMemory() {
703 u64 local_memory_size = 0;
704 if (stage == ShaderType::Compute) {
705 local_memory_size = registry.GetComputeInfo().local_memory_size_in_words * 4ULL;
706 } else {
707 local_memory_size = header.GetLocalMemorySize();
708 }
709 if (local_memory_size == 0) {
710 return;
711 }
712 const u64 element_count = Common::AlignUp(local_memory_size, 4) / 4;
713 code.AddLine("uint {}[{}];", GetLocalMemory(), element_count);
714 code.AddNewLine();
715 }
716
717 void DeclareInternalFlags() {
718 for (u32 flag = 0; flag < static_cast<u32>(InternalFlag::Amount); flag++) {
719 const auto flag_code = static_cast<InternalFlag>(flag);
720 code.AddLine("bool {} = false;", GetInternalFlag(flag_code));
721 }
722 code.AddNewLine();
723 }
724
725 const char* GetInputFlags(PixelImap attribute) {
726 switch (attribute) {
727 case PixelImap::Perspective:
728 return "smooth";
729 case PixelImap::Constant:
730 return "flat";
731 case PixelImap::ScreenLinear:
732 return "noperspective";
733 case PixelImap::Unused:
734 break;
735 }
736 UNIMPLEMENTED_MSG("Unknown attribute usage index={}", attribute);
737 return {};
738 }
739
740 void DeclareInputAttributes() {
741 if (ir.HasPhysicalAttributes()) {
742 const u32 num_inputs{GetNumPhysicalInputAttributes()};
743 for (u32 i = 0; i < num_inputs; ++i) {
744 DeclareInputAttribute(ToGenericAttribute(i), true);
745 }
746 code.AddNewLine();
747 return;
748 }
749
750 const auto& attributes = ir.GetInputAttributes();
751 for (const auto index : attributes) {
752 if (IsGenericAttribute(index)) {
753 DeclareInputAttribute(index, false);
754 }
755 }
756 if (!attributes.empty()) {
757 code.AddNewLine();
758 }
759 }
760
761 void DeclareInputAttribute(Attribute::Index index, bool skip_unused) {
762 const u32 location{GetGenericAttributeIndex(index)};
763
764 std::string name{GetGenericInputAttribute(index)};
765 if (stage == ShaderType::Geometry) {
766 name = "gs_" + name + "[]";
767 }
768
769 std::string suffix_;
770 if (stage == ShaderType::Fragment) {
771 const auto input_mode{header.ps.GetPixelImap(location)};
772 if (input_mode == PixelImap::Unused) {
773 return;
774 }
775 suffix_ = GetInputFlags(input_mode);
776 }
777
778 code.AddLine("layout (location = {}) {} in vec4 {};", location, suffix_, name);
779 }
780
781 void DeclareOutputAttributes() {
782 if (ir.HasPhysicalAttributes() && stage != ShaderType::Fragment) {
783 for (u32 i = 0; i < GetNumPhysicalVaryings(); ++i) {
784 DeclareOutputAttribute(ToGenericAttribute(i));
785 }
786 code.AddNewLine();
787 return;
788 }
789
790 const auto& attributes = ir.GetOutputAttributes();
791 for (const auto index : attributes) {
792 if (IsGenericAttribute(index)) {
793 DeclareOutputAttribute(index);
794 }
795 }
796 if (!attributes.empty()) {
797 code.AddNewLine();
798 }
799 }
800
801 std::optional<std::size_t> GetNumComponents(Attribute::Index index, u8 element = 0) const {
802 const u8 location = static_cast<u8>(static_cast<u32>(index) * 4 + element);
803 const auto it = transform_feedback.find(location);
804 if (it == transform_feedback.end()) {
805 return std::nullopt;
806 }
807 return it->second.components;
808 }
809
810 std::string GetTransformFeedbackDecoration(Attribute::Index index, u8 element = 0) const {
811 const u8 location = static_cast<u8>(static_cast<u32>(index) * 4 + element);
812 const auto it = transform_feedback.find(location);
813 if (it == transform_feedback.end()) {
814 return {};
815 }
816
817 const VaryingTFB& tfb = it->second;
818 return fmt::format("xfb_buffer = {}, xfb_offset = {}, xfb_stride = {}", tfb.buffer,
819 tfb.offset, tfb.stride);
820 }
821
822 void DeclareOutputAttribute(Attribute::Index index) {
823 static constexpr std::string_view swizzle = "xyzw";
824 u8 element = 0;
825 while (element < 4) {
826 auto xfb = GetTransformFeedbackDecoration(index, element);
827 if (!xfb.empty()) {
828 xfb = fmt::format(", {}", xfb);
829 }
830 const std::size_t remainder = 4 - element;
831 const std::size_t num_components = GetNumComponents(index, element).value_or(remainder);
832 const char* const type = FLOAT_TYPES.at(num_components - 1);
833
834 const u32 location = GetGenericAttributeIndex(index);
835
836 GenericVaryingDescription description;
837 description.first_element = static_cast<u8>(element);
838 description.is_scalar = num_components == 1;
839 description.name = AppendSuffix(location, OUTPUT_ATTRIBUTE_NAME);
840 if (element != 0 || num_components != 4) {
841 const std::string_view name_swizzle = swizzle.substr(element, num_components);
842 description.name = fmt::format("{}_{}", description.name, name_swizzle);
843 }
844 for (std::size_t i = 0; i < num_components; ++i) {
845 const u8 offset = static_cast<u8>(location * 4 + element + i);
846 varying_description.insert({offset, description});
847 }
848
849 code.AddLine("layout (location = {}, component = {}{}) out {} {};", location, element,
850 xfb, type, description.name);
851
852 element = static_cast<u8>(static_cast<std::size_t>(element) + num_components);
853 }
854 }
855
856 void DeclareConstantBuffers() {
857 u32 binding = device.GetBaseBindings(stage).uniform_buffer;
858 for (const auto& [index, info] : ir.GetConstantBuffers()) {
859 const u32 num_elements = Common::DivCeil(info.GetSize(), 4 * sizeof(u32));
860 const u32 size = info.IsIndirect() ? MAX_CONSTBUFFER_ELEMENTS : num_elements;
861 code.AddLine("layout (std140, binding = {}) uniform {} {{", binding++,
862 GetConstBufferBlock(index));
863 code.AddLine(" uvec4 {}[{}];", GetConstBuffer(index), size);
864 code.AddLine("}};");
865 code.AddNewLine();
866 }
867 }
868
869 void DeclareGlobalMemory() {
870 u32 binding = device.GetBaseBindings(stage).shader_storage_buffer;
871 for (const auto& [base, usage] : ir.GetGlobalMemory()) {
872 // Since we don't know how the shader will use the shader, hint the driver to disable as
873 // much optimizations as possible
874 std::string qualifier = "coherent volatile";
875 if (usage.is_read && !usage.is_written) {
876 qualifier += " readonly";
877 } else if (usage.is_written && !usage.is_read) {
878 qualifier += " writeonly";
879 }
880
881 code.AddLine("layout (std430, binding = {}) {} buffer {} {{", binding++, qualifier,
882 GetGlobalMemoryBlock(base));
883 code.AddLine(" uint {}[];", GetGlobalMemory(base));
884 code.AddLine("}};");
885 code.AddNewLine();
886 }
887 }
888
889 void DeclareSamplers() {
890 u32 binding = device.GetBaseBindings(stage).sampler;
891 for (const auto& sampler : ir.GetSamplers()) {
892 const std::string name = GetSampler(sampler);
893 const std::string description = fmt::format("layout (binding = {}) uniform", binding);
894 binding += sampler.is_indexed ? sampler.size : 1;
895
896 std::string sampler_type = [&]() {
897 if (sampler.is_buffer) {
898 return "samplerBuffer";
899 }
900 switch (sampler.type) {
901 case TextureType::Texture1D:
902 return "sampler1D";
903 case TextureType::Texture2D:
904 return "sampler2D";
905 case TextureType::Texture3D:
906 return "sampler3D";
907 case TextureType::TextureCube:
908 return "samplerCube";
909 default:
910 UNREACHABLE();
911 return "sampler2D";
912 }
913 }();
914 if (sampler.is_array) {
915 sampler_type += "Array";
916 }
917 if (sampler.is_shadow) {
918 sampler_type += "Shadow";
919 }
920
921 if (!sampler.is_indexed) {
922 code.AddLine("{} {} {};", description, sampler_type, name);
923 } else {
924 code.AddLine("{} {} {}[{}];", description, sampler_type, name, sampler.size);
925 }
926 }
927 if (!ir.GetSamplers().empty()) {
928 code.AddNewLine();
929 }
930 }
931
932 void DeclarePhysicalAttributeReader() {
933 if (!ir.HasPhysicalAttributes()) {
934 return;
935 }
936 code.AddLine("float ReadPhysicalAttribute(uint physical_address) {{");
937 ++code.scope;
938 code.AddLine("switch (physical_address) {{");
939
940 // Just declare generic attributes for now.
941 const auto num_attributes{static_cast<u32>(GetNumPhysicalInputAttributes())};
942 for (u32 index = 0; index < num_attributes; ++index) {
943 const auto attribute{ToGenericAttribute(index)};
944 for (u32 element = 0; element < 4; ++element) {
945 constexpr u32 generic_base = 0x80;
946 constexpr u32 generic_stride = 16;
947 constexpr u32 element_stride = 4;
948 const u32 address{generic_base + index * generic_stride + element * element_stride};
949
950 const bool declared = stage != ShaderType::Fragment ||
951 header.ps.GetPixelImap(index) != PixelImap::Unused;
952 const std::string value =
953 declared ? ReadAttribute(attribute, element).AsFloat() : "0.0f";
954 code.AddLine("case 0x{:X}U: return {};", address, value);
955 }
956 }
957
958 code.AddLine("default: return 0;");
959
960 code.AddLine("}}");
961 --code.scope;
962 code.AddLine("}}");
963 code.AddNewLine();
964 }
965
966 void DeclareImages() {
967 u32 binding = device.GetBaseBindings(stage).image;
968 for (const auto& image : ir.GetImages()) {
969 std::string qualifier = "coherent volatile";
970 if (image.is_read && !image.is_written) {
971 qualifier += " readonly";
972 } else if (image.is_written && !image.is_read) {
973 qualifier += " writeonly";
974 }
975
976 const char* format = image.is_atomic ? "r32ui, " : "";
977 const char* type_declaration = GetImageTypeDeclaration(image.type);
978 code.AddLine("layout ({}binding = {}) {} uniform uimage{} {};", format, binding++,
979 qualifier, type_declaration, GetImage(image));
980 }
981 if (!ir.GetImages().empty()) {
982 code.AddNewLine();
983 }
984 }
985
986 void VisitBlock(const NodeBlock& bb) {
987 for (const auto& node : bb) {
988 Visit(node).CheckVoid();
989 }
990 }
991
992 Expression Visit(const Node& node) {
993 if (const auto operation = std::get_if<OperationNode>(&*node)) {
994 if (const auto amend_index = operation->GetAmendIndex()) {
995 Visit(ir.GetAmendNode(*amend_index)).CheckVoid();
996 }
997 const auto operation_index = static_cast<std::size_t>(operation->GetCode());
998 if (operation_index >= operation_decompilers.size()) {
999 UNREACHABLE_MSG("Out of bounds operation: {}", operation_index);
1000 return {};
1001 }
1002 const auto decompiler = operation_decompilers[operation_index];
1003 if (decompiler == nullptr) {
1004 UNREACHABLE_MSG("Undefined operation: {}", operation_index);
1005 return {};
1006 }
1007 return (this->*decompiler)(*operation);
1008 }
1009
1010 if (const auto gpr = std::get_if<GprNode>(&*node)) {
1011 const u32 index = gpr->GetIndex();
1012 if (index == Register::ZeroIndex) {
1013 return {"0U", Type::Uint};
1014 }
1015 return {GetRegister(index), Type::Float};
1016 }
1017
1018 if (const auto cv = std::get_if<CustomVarNode>(&*node)) {
1019 const u32 index = cv->GetIndex();
1020 return {GetCustomVariable(index), Type::Float};
1021 }
1022
1023 if (const auto immediate = std::get_if<ImmediateNode>(&*node)) {
1024 const u32 value = immediate->GetValue();
1025 if (value < 10) {
1026 // For eyecandy avoid using hex numbers on single digits
1027 return {fmt::format("{}U", immediate->GetValue()), Type::Uint};
1028 }
1029 return {fmt::format("0x{:X}U", immediate->GetValue()), Type::Uint};
1030 }
1031
1032 if (const auto predicate = std::get_if<PredicateNode>(&*node)) {
1033 const auto value = [&]() -> std::string {
1034 switch (const auto index = predicate->GetIndex(); index) {
1035 case Tegra::Shader::Pred::UnusedIndex:
1036 return "true";
1037 case Tegra::Shader::Pred::NeverExecute:
1038 return "false";
1039 default:
1040 return GetPredicate(index);
1041 }
1042 }();
1043 if (predicate->IsNegated()) {
1044 return {fmt::format("!({})", value), Type::Bool};
1045 }
1046 return {value, Type::Bool};
1047 }
1048
1049 if (const auto abuf = std::get_if<AbufNode>(&*node)) {
1050 UNIMPLEMENTED_IF_MSG(abuf->IsPhysicalBuffer() && stage == ShaderType::Geometry,
1051 "Physical attributes in geometry shaders are not implemented");
1052 if (abuf->IsPhysicalBuffer()) {
1053 return {fmt::format("ReadPhysicalAttribute({})",
1054 Visit(abuf->GetPhysicalAddress()).AsUint()),
1055 Type::Float};
1056 }
1057 return ReadAttribute(abuf->GetIndex(), abuf->GetElement(), abuf->GetBuffer());
1058 }
1059
1060 if (const auto cbuf = std::get_if<CbufNode>(&*node)) {
1061 const Node offset = cbuf->GetOffset();
1062
1063 if (const auto immediate = std::get_if<ImmediateNode>(&*offset)) {
1064 // Direct access
1065 const u32 offset_imm = immediate->GetValue();
1066 ASSERT_MSG(offset_imm % 4 == 0, "Unaligned cbuf direct access");
1067 return {fmt::format("{}[{}][{}]", GetConstBuffer(cbuf->GetIndex()),
1068 offset_imm / (4 * 4), (offset_imm / 4) % 4),
1069 Type::Uint};
1070 }
1071
1072 // Indirect access
1073 const std::string final_offset = code.GenerateTemporary();
1074 code.AddLine("uint {} = {} >> 2;", final_offset, Visit(offset).AsUint());
1075
1076 if (!device.HasComponentIndexingBug()) {
1077 return {fmt::format("{}[{} >> 2][{} & 3]", GetConstBuffer(cbuf->GetIndex()),
1078 final_offset, final_offset),
1079 Type::Uint};
1080 }
1081
1082 // AMD's proprietary GLSL compiler emits ill code for variable component access.
1083 // To bypass this driver bug generate 4 ifs, one per each component.
1084 const std::string pack = code.GenerateTemporary();
1085 code.AddLine("uvec4 {} = {}[{} >> 2];", pack, GetConstBuffer(cbuf->GetIndex()),
1086 final_offset);
1087
1088 const std::string result = code.GenerateTemporary();
1089 code.AddLine("uint {};", result);
1090 for (u32 swizzle = 0; swizzle < 4; ++swizzle) {
1091 code.AddLine("if (({} & 3) == {}) {} = {}{};", final_offset, swizzle, result, pack,
1092 GetSwizzle(swizzle));
1093 }
1094 return {result, Type::Uint};
1095 }
1096
1097 if (const auto gmem = std::get_if<GmemNode>(&*node)) {
1098 const std::string real = Visit(gmem->GetRealAddress()).AsUint();
1099 const std::string base = Visit(gmem->GetBaseAddress()).AsUint();
1100 const std::string final_offset = fmt::format("({} - {}) >> 2", real, base);
1101 return {fmt::format("{}[{}]", GetGlobalMemory(gmem->GetDescriptor()), final_offset),
1102 Type::Uint};
1103 }
1104
1105 if (const auto lmem = std::get_if<LmemNode>(&*node)) {
1106 return {
1107 fmt::format("{}[{} >> 2]", GetLocalMemory(), Visit(lmem->GetAddress()).AsUint()),
1108 Type::Uint};
1109 }
1110
1111 if (const auto smem = std::get_if<SmemNode>(&*node)) {
1112 return {fmt::format("smem[{} >> 2]", Visit(smem->GetAddress()).AsUint()), Type::Uint};
1113 }
1114
1115 if (const auto internal_flag = std::get_if<InternalFlagNode>(&*node)) {
1116 return {GetInternalFlag(internal_flag->GetFlag()), Type::Bool};
1117 }
1118
1119 if (const auto conditional = std::get_if<ConditionalNode>(&*node)) {
1120 if (const auto amend_index = conditional->GetAmendIndex()) {
1121 Visit(ir.GetAmendNode(*amend_index)).CheckVoid();
1122 }
1123 // It's invalid to call conditional on nested nodes, use an operation instead
1124 code.AddLine("if ({}) {{", Visit(conditional->GetCondition()).AsBool());
1125 ++code.scope;
1126
1127 VisitBlock(conditional->GetCode());
1128
1129 --code.scope;
1130 code.AddLine("}}");
1131 return {};
1132 }
1133
1134 if (const auto comment = std::get_if<CommentNode>(&*node)) {
1135 code.AddLine("// " + comment->GetText());
1136 return {};
1137 }
1138
1139 UNREACHABLE();
1140 return {};
1141 }
1142
1143 Expression ReadAttribute(Attribute::Index attribute, u32 element, const Node& buffer = {}) {
1144 const auto GeometryPass = [&](std::string_view name) {
1145 if (stage == ShaderType::Geometry && buffer) {
1146 // TODO(Rodrigo): Guard geometry inputs against out of bound reads. Some games
1147 // set an 0x80000000 index for those and the shader fails to build. Find out why
1148 // this happens and what's its intent.
1149 return fmt::format("gs_{}[{} % {}]", name, Visit(buffer).AsUint(),
1150 max_input_vertices.value());
1151 }
1152 return std::string(name);
1153 };
1154
1155 switch (attribute) {
1156 case Attribute::Index::Position:
1157 switch (stage) {
1158 case ShaderType::Geometry:
1159 return {fmt::format("gl_in[{}].gl_Position{}", Visit(buffer).AsUint(),
1160 GetSwizzle(element)),
1161 Type::Float};
1162 case ShaderType::Fragment:
1163 return {"gl_FragCoord"s + GetSwizzle(element), Type::Float};
1164 default:
1165 UNREACHABLE();
1166 return {"0", Type::Int};
1167 }
1168 case Attribute::Index::FrontColor:
1169 return {"gl_Color"s + GetSwizzle(element), Type::Float};
1170 case Attribute::Index::FrontSecondaryColor:
1171 return {"gl_SecondaryColor"s + GetSwizzle(element), Type::Float};
1172 case Attribute::Index::PointCoord:
1173 switch (element) {
1174 case 0:
1175 return {"gl_PointCoord.x", Type::Float};
1176 case 1:
1177 return {"gl_PointCoord.y", Type::Float};
1178 case 2:
1179 case 3:
1180 return {"0.0f", Type::Float};
1181 }
1182 UNREACHABLE();
1183 return {"0", Type::Int};
1184 case Attribute::Index::TessCoordInstanceIDVertexID:
1185 // TODO(Subv): Find out what the values are for the first two elements when inside a
1186 // vertex shader, and what's the value of the fourth element when inside a Tess Eval
1187 // shader.
1188 ASSERT(stage == ShaderType::Vertex);
1189 switch (element) {
1190 case 2:
1191 // Config pack's first value is instance_id.
1192 return {"gl_InstanceID", Type::Int};
1193 case 3:
1194 return {"gl_VertexID", Type::Int};
1195 }
1196 UNIMPLEMENTED_MSG("Unmanaged TessCoordInstanceIDVertexID element={}", element);
1197 return {"0", Type::Int};
1198 case Attribute::Index::FrontFacing:
1199 // TODO(Subv): Find out what the values are for the other elements.
1200 ASSERT(stage == ShaderType::Fragment);
1201 switch (element) {
1202 case 3:
1203 return {"(gl_FrontFacing ? -1 : 0)", Type::Int};
1204 }
1205 UNIMPLEMENTED_MSG("Unmanaged FrontFacing element={}", element);
1206 return {"0", Type::Int};
1207 default:
1208 if (IsGenericAttribute(attribute)) {
1209 return {GeometryPass(GetGenericInputAttribute(attribute)) + GetSwizzle(element),
1210 Type::Float};
1211 }
1212 if (IsLegacyTexCoord(attribute)) {
1213 UNIMPLEMENTED_IF(stage == ShaderType::Geometry);
1214 return {fmt::format("gl_TexCoord[{}]{}", GetLegacyTexCoordIndex(attribute),
1215 GetSwizzle(element)),
1216 Type::Float};
1217 }
1218 break;
1219 }
1220 UNIMPLEMENTED_MSG("Unhandled input attribute: {}", attribute);
1221 return {"0", Type::Int};
1222 }
1223
1224 Expression ApplyPrecise(Operation operation, std::string value, Type type) {
1225 if (!IsPrecise(operation)) {
1226 return {std::move(value), type};
1227 }
1228 // Old Nvidia drivers have a bug with precise and texture sampling. These are more likely to
1229 // be found in fragment shaders, so we disable precise there. There are vertex shaders that
1230 // also fail to build but nobody seems to care about those.
1231 // Note: Only bugged drivers will skip precise.
1232 const bool disable_precise = device.HasPreciseBug() && stage == ShaderType::Fragment;
1233
1234 std::string temporary = code.GenerateTemporary();
1235 code.AddLine("{}{} {} = {};", disable_precise ? "" : "precise ", GetTypeString(type),
1236 temporary, value);
1237 return {std::move(temporary), type};
1238 }
1239
1240 Expression VisitOperand(Operation operation, std::size_t operand_index) {
1241 const auto& operand = operation[operand_index];
1242 const bool parent_precise = IsPrecise(operation);
1243 const bool child_precise = IsPrecise(operand);
1244 const bool child_trivial = !std::holds_alternative<OperationNode>(*operand);
1245 if (!parent_precise || child_precise || child_trivial) {
1246 return Visit(operand);
1247 }
1248
1249 Expression value = Visit(operand);
1250 std::string temporary = code.GenerateTemporary();
1251 code.AddLine("{} {} = {};", GetTypeString(value.GetType()), temporary, value.GetCode());
1252 return {std::move(temporary), value.GetType()};
1253 }
1254
1255 std::optional<Expression> GetOutputAttribute(const AbufNode* abuf) {
1256 const u32 element = abuf->GetElement();
1257 switch (const auto attribute = abuf->GetIndex()) {
1258 case Attribute::Index::Position:
1259 return {{"gl_Position"s + GetSwizzle(element), Type::Float}};
1260 case Attribute::Index::LayerViewportPointSize:
1261 switch (element) {
1262 case 0:
1263 UNIMPLEMENTED();
1264 return std::nullopt;
1265 case 1:
1266 if (stage == ShaderType::Vertex && !device.HasVertexViewportLayer()) {
1267 return std::nullopt;
1268 }
1269 return {{"gl_Layer", Type::Int}};
1270 case 2:
1271 if (stage == ShaderType::Vertex && !device.HasVertexViewportLayer()) {
1272 return std::nullopt;
1273 }
1274 return {{"gl_ViewportIndex", Type::Int}};
1275 case 3:
1276 return {{"gl_PointSize", Type::Float}};
1277 }
1278 return std::nullopt;
1279 case Attribute::Index::FrontColor:
1280 return {{"gl_FrontColor"s + GetSwizzle(element), Type::Float}};
1281 case Attribute::Index::FrontSecondaryColor:
1282 return {{"gl_FrontSecondaryColor"s + GetSwizzle(element), Type::Float}};
1283 case Attribute::Index::BackColor:
1284 return {{"gl_BackColor"s + GetSwizzle(element), Type::Float}};
1285 case Attribute::Index::BackSecondaryColor:
1286 return {{"gl_BackSecondaryColor"s + GetSwizzle(element), Type::Float}};
1287 case Attribute::Index::ClipDistances0123:
1288 return {{fmt::format("gl_ClipDistance[{}]", element), Type::Float}};
1289 case Attribute::Index::ClipDistances4567:
1290 return {{fmt::format("gl_ClipDistance[{}]", element + 4), Type::Float}};
1291 default:
1292 if (IsGenericAttribute(attribute)) {
1293 return {{GetGenericOutputAttribute(attribute, element), Type::Float}};
1294 }
1295 if (IsLegacyTexCoord(attribute)) {
1296 return {{fmt::format("gl_TexCoord[{}]{}", GetLegacyTexCoordIndex(attribute),
1297 GetSwizzle(element)),
1298 Type::Float}};
1299 }
1300 UNIMPLEMENTED_MSG("Unhandled output attribute: {}", attribute);
1301 return std::nullopt;
1302 }
1303 }
1304
1305 Expression GenerateUnary(Operation operation, std::string_view func, Type result_type,
1306 Type type_a) {
1307 std::string op_str = fmt::format("{}({})", func, VisitOperand(operation, 0).As(type_a));
1308 return ApplyPrecise(operation, std::move(op_str), result_type);
1309 }
1310
1311 Expression GenerateBinaryInfix(Operation operation, std::string_view func, Type result_type,
1312 Type type_a, Type type_b) {
1313 const std::string op_a = VisitOperand(operation, 0).As(type_a);
1314 const std::string op_b = VisitOperand(operation, 1).As(type_b);
1315 std::string op_str = fmt::format("({} {} {})", op_a, func, op_b);
1316
1317 return ApplyPrecise(operation, std::move(op_str), result_type);
1318 }
1319
1320 Expression GenerateBinaryCall(Operation operation, std::string_view func, Type result_type,
1321 Type type_a, Type type_b) {
1322 const std::string op_a = VisitOperand(operation, 0).As(type_a);
1323 const std::string op_b = VisitOperand(operation, 1).As(type_b);
1324 std::string op_str = fmt::format("{}({}, {})", func, op_a, op_b);
1325
1326 return ApplyPrecise(operation, std::move(op_str), result_type);
1327 }
1328
1329 Expression GenerateTernary(Operation operation, std::string_view func, Type result_type,
1330 Type type_a, Type type_b, Type type_c) {
1331 const std::string op_a = VisitOperand(operation, 0).As(type_a);
1332 const std::string op_b = VisitOperand(operation, 1).As(type_b);
1333 const std::string op_c = VisitOperand(operation, 2).As(type_c);
1334 std::string op_str = fmt::format("{}({}, {}, {})", func, op_a, op_b, op_c);
1335
1336 return ApplyPrecise(operation, std::move(op_str), result_type);
1337 }
1338
1339 Expression GenerateQuaternary(Operation operation, const std::string& func, Type result_type,
1340 Type type_a, Type type_b, Type type_c, Type type_d) {
1341 const std::string op_a = VisitOperand(operation, 0).As(type_a);
1342 const std::string op_b = VisitOperand(operation, 1).As(type_b);
1343 const std::string op_c = VisitOperand(operation, 2).As(type_c);
1344 const std::string op_d = VisitOperand(operation, 3).As(type_d);
1345 std::string op_str = fmt::format("{}({}, {}, {}, {})", func, op_a, op_b, op_c, op_d);
1346
1347 return ApplyPrecise(operation, std::move(op_str), result_type);
1348 }
1349
1350 std::string GenerateTexture(Operation operation, const std::string& function_suffix,
1351 const std::vector<TextureIR>& extras, bool separate_dc = false) {
1352 constexpr std::array coord_constructors = {"float", "vec2", "vec3", "vec4"};
1353
1354 const auto meta = std::get_if<MetaTexture>(&operation.GetMeta());
1355 ASSERT(meta);
1356
1357 const std::size_t count = operation.GetOperandsCount();
1358 const bool has_array = meta->sampler.is_array;
1359 const bool has_shadow = meta->sampler.is_shadow;
1360 const bool workaround_lod_array_shadow_as_grad =
1361 !device.HasTextureShadowLod() && function_suffix == "Lod" && meta->sampler.is_shadow &&
1362 ((meta->sampler.type == TextureType::Texture2D && meta->sampler.is_array) ||
1363 meta->sampler.type == TextureType::TextureCube);
1364
1365 std::string expr = "texture";
1366
1367 if (workaround_lod_array_shadow_as_grad) {
1368 expr += "Grad";
1369 } else {
1370 expr += function_suffix;
1371 }
1372
1373 if (!meta->aoffi.empty()) {
1374 expr += "Offset";
1375 } else if (!meta->ptp.empty()) {
1376 expr += "Offsets";
1377 }
1378 if (!meta->sampler.is_indexed) {
1379 expr += '(' + GetSampler(meta->sampler) + ", ";
1380 } else {
1381 expr += '(' + GetSampler(meta->sampler) + '[' + Visit(meta->index).AsUint() + "], ";
1382 }
1383 expr += coord_constructors.at(count + (has_array ? 1 : 0) +
1384 (has_shadow && !separate_dc ? 1 : 0) - 1);
1385 expr += '(';
1386 for (std::size_t i = 0; i < count; ++i) {
1387 expr += Visit(operation[i]).AsFloat();
1388
1389 const std::size_t next = i + 1;
1390 if (next < count)
1391 expr += ", ";
1392 }
1393 if (has_array) {
1394 expr += ", float(" + Visit(meta->array).AsInt() + ')';
1395 }
1396 if (has_shadow) {
1397 if (separate_dc) {
1398 expr += "), " + Visit(meta->depth_compare).AsFloat();
1399 } else {
1400 expr += ", " + Visit(meta->depth_compare).AsFloat() + ')';
1401 }
1402 } else {
1403 expr += ')';
1404 }
1405
1406 if (workaround_lod_array_shadow_as_grad) {
1407 switch (meta->sampler.type) {
1408 case TextureType::Texture2D:
1409 return expr + ", vec2(0.0), vec2(0.0))";
1410 case TextureType::TextureCube:
1411 return expr + ", vec3(0.0), vec3(0.0))";
1412 default:
1413 UNREACHABLE();
1414 break;
1415 }
1416 }
1417
1418 for (const auto& variant : extras) {
1419 if (const auto argument = std::get_if<TextureArgument>(&variant)) {
1420 expr += GenerateTextureArgument(*argument);
1421 } else if (std::holds_alternative<TextureOffset>(variant)) {
1422 if (!meta->aoffi.empty()) {
1423 expr += GenerateTextureAoffi(meta->aoffi);
1424 } else if (!meta->ptp.empty()) {
1425 expr += GenerateTexturePtp(meta->ptp);
1426 }
1427 } else if (std::holds_alternative<TextureDerivates>(variant)) {
1428 expr += GenerateTextureDerivates(meta->derivates);
1429 } else {
1430 UNREACHABLE();
1431 }
1432 }
1433
1434 return expr + ')';
1435 }
1436
1437 std::string GenerateTextureArgument(const TextureArgument& argument) {
1438 const auto& [type, operand] = argument;
1439 if (operand == nullptr) {
1440 return {};
1441 }
1442
1443 std::string expr = ", ";
1444 switch (type) {
1445 case Type::Int:
1446 if (const auto immediate = std::get_if<ImmediateNode>(&*operand)) {
1447 // Inline the string as an immediate integer in GLSL (some extra arguments are
1448 // required to be constant)
1449 expr += std::to_string(static_cast<s32>(immediate->GetValue()));
1450 } else {
1451 expr += Visit(operand).AsInt();
1452 }
1453 break;
1454 case Type::Float:
1455 expr += Visit(operand).AsFloat();
1456 break;
1457 default: {
1458 const auto type_int = static_cast<u32>(type);
1459 UNIMPLEMENTED_MSG("Unimplemented extra type={}", type_int);
1460 expr += '0';
1461 break;
1462 }
1463 }
1464 return expr;
1465 }
1466
1467 std::string ReadTextureOffset(const Node& value) {
1468 if (const auto immediate = std::get_if<ImmediateNode>(&*value)) {
1469 // Inline the string as an immediate integer in GLSL (AOFFI arguments are required
1470 // to be constant by the standard).
1471 return std::to_string(static_cast<s32>(immediate->GetValue()));
1472 } else if (device.HasVariableAoffi()) {
1473 // Avoid using variable AOFFI on unsupported devices.
1474 return Visit(value).AsInt();
1475 } else {
1476 // Insert 0 on devices not supporting variable AOFFI.
1477 return "0";
1478 }
1479 }
1480
1481 std::string GenerateTextureAoffi(const std::vector<Node>& aoffi) {
1482 if (aoffi.empty()) {
1483 return {};
1484 }
1485 constexpr std::array coord_constructors = {"int", "ivec2", "ivec3"};
1486 std::string expr = ", ";
1487 expr += coord_constructors.at(aoffi.size() - 1);
1488 expr += '(';
1489
1490 for (std::size_t index = 0; index < aoffi.size(); ++index) {
1491 expr += ReadTextureOffset(aoffi.at(index));
1492 if (index + 1 < aoffi.size()) {
1493 expr += ", ";
1494 }
1495 }
1496 expr += ')';
1497
1498 return expr;
1499 }
1500
1501 std::string GenerateTexturePtp(const std::vector<Node>& ptp) {
1502 static constexpr std::size_t num_vectors = 4;
1503 ASSERT(ptp.size() == num_vectors * 2);
1504
1505 std::string expr = ", ivec2[](";
1506 for (std::size_t vector = 0; vector < num_vectors; ++vector) {
1507 const bool has_next = vector + 1 < num_vectors;
1508 expr += fmt::format("ivec2({}, {}){}", ReadTextureOffset(ptp.at(vector * 2)),
1509 ReadTextureOffset(ptp.at(vector * 2 + 1)), has_next ? ", " : "");
1510 }
1511 expr += ')';
1512 return expr;
1513 }
1514
1515 std::string GenerateTextureDerivates(const std::vector<Node>& derivates) {
1516 if (derivates.empty()) {
1517 return {};
1518 }
1519 constexpr std::array coord_constructors = {"float", "vec2", "vec3"};
1520 std::string expr = ", ";
1521 const std::size_t components = derivates.size() / 2;
1522 std::string dx = coord_constructors.at(components - 1);
1523 std::string dy = coord_constructors.at(components - 1);
1524 dx += '(';
1525 dy += '(';
1526
1527 for (std::size_t index = 0; index < components; ++index) {
1528 const auto& operand_x{derivates.at(index * 2)};
1529 const auto& operand_y{derivates.at(index * 2 + 1)};
1530 dx += Visit(operand_x).AsFloat();
1531 dy += Visit(operand_y).AsFloat();
1532
1533 if (index + 1 < components) {
1534 dx += ", ";
1535 dy += ", ";
1536 }
1537 }
1538 dx += ')';
1539 dy += ')';
1540 expr += dx + ", " + dy;
1541
1542 return expr;
1543 }
1544
1545 std::string BuildIntegerCoordinates(Operation operation) {
1546 constexpr std::array constructors{"int(", "ivec2(", "ivec3(", "ivec4("};
1547 const std::size_t coords_count{operation.GetOperandsCount()};
1548 std::string expr = constructors.at(coords_count - 1);
1549 for (std::size_t i = 0; i < coords_count; ++i) {
1550 expr += VisitOperand(operation, i).AsInt();
1551 if (i + 1 < coords_count) {
1552 expr += ", ";
1553 }
1554 }
1555 expr += ')';
1556 return expr;
1557 }
1558
1559 std::string BuildImageValues(Operation operation) {
1560 constexpr std::array constructors{"uint", "uvec2", "uvec3", "uvec4"};
1561 const auto& meta{std::get<MetaImage>(operation.GetMeta())};
1562
1563 const std::size_t values_count{meta.values.size()};
1564 std::string expr = fmt::format("{}(", constructors.at(values_count - 1));
1565 for (std::size_t i = 0; i < values_count; ++i) {
1566 expr += Visit(meta.values.at(i)).AsUint();
1567 if (i + 1 < values_count) {
1568 expr += ", ";
1569 }
1570 }
1571 expr += ')';
1572 return expr;
1573 }
1574
1575 Expression Assign(Operation operation) {
1576 const Node& dest = operation[0];
1577 const Node& src = operation[1];
1578
1579 Expression target;
1580 if (const auto gpr = std::get_if<GprNode>(&*dest)) {
1581 if (gpr->GetIndex() == Register::ZeroIndex) {
1582 // Writing to Register::ZeroIndex is a no op but we still have to visit the source
1583 // as it might have side effects.
1584 code.AddLine("{};", Visit(src).GetCode());
1585 return {};
1586 }
1587 target = {GetRegister(gpr->GetIndex()), Type::Float};
1588 } else if (const auto abuf = std::get_if<AbufNode>(&*dest)) {
1589 UNIMPLEMENTED_IF(abuf->IsPhysicalBuffer());
1590 auto output = GetOutputAttribute(abuf);
1591 if (!output) {
1592 return {};
1593 }
1594 target = std::move(*output);
1595 } else if (const auto lmem = std::get_if<LmemNode>(&*dest)) {
1596 target = {
1597 fmt::format("{}[{} >> 2]", GetLocalMemory(), Visit(lmem->GetAddress()).AsUint()),
1598 Type::Uint};
1599 } else if (const auto smem = std::get_if<SmemNode>(&*dest)) {
1600 ASSERT(stage == ShaderType::Compute);
1601 target = {fmt::format("smem[{} >> 2]", Visit(smem->GetAddress()).AsUint()), Type::Uint};
1602 } else if (const auto gmem = std::get_if<GmemNode>(&*dest)) {
1603 const std::string real = Visit(gmem->GetRealAddress()).AsUint();
1604 const std::string base = Visit(gmem->GetBaseAddress()).AsUint();
1605 const std::string final_offset = fmt::format("({} - {}) >> 2", real, base);
1606 target = {fmt::format("{}[{}]", GetGlobalMemory(gmem->GetDescriptor()), final_offset),
1607 Type::Uint};
1608 } else if (const auto cv = std::get_if<CustomVarNode>(&*dest)) {
1609 target = {GetCustomVariable(cv->GetIndex()), Type::Float};
1610 } else {
1611 UNREACHABLE_MSG("Assign called without a proper target");
1612 }
1613
1614 code.AddLine("{} = {};", target.GetCode(), Visit(src).As(target.GetType()));
1615 return {};
1616 }
1617
1618 template <Type type>
1619 Expression Add(Operation operation) {
1620 return GenerateBinaryInfix(operation, "+", type, type, type);
1621 }
1622
1623 template <Type type>
1624 Expression Mul(Operation operation) {
1625 return GenerateBinaryInfix(operation, "*", type, type, type);
1626 }
1627
1628 template <Type type>
1629 Expression Div(Operation operation) {
1630 return GenerateBinaryInfix(operation, "/", type, type, type);
1631 }
1632
1633 template <Type type>
1634 Expression Fma(Operation operation) {
1635 return GenerateTernary(operation, "fma", type, type, type, type);
1636 }
1637
1638 template <Type type>
1639 Expression Negate(Operation operation) {
1640 return GenerateUnary(operation, "-", type, type);
1641 }
1642
1643 template <Type type>
1644 Expression Absolute(Operation operation) {
1645 return GenerateUnary(operation, "abs", type, type);
1646 }
1647
1648 Expression FClamp(Operation operation) {
1649 return GenerateTernary(operation, "clamp", Type::Float, Type::Float, Type::Float,
1650 Type::Float);
1651 }
1652
1653 Expression FCastHalf0(Operation operation) {
1654 return {fmt::format("({})[0]", VisitOperand(operation, 0).AsHalfFloat()), Type::Float};
1655 }
1656
1657 Expression FCastHalf1(Operation operation) {
1658 return {fmt::format("({})[1]", VisitOperand(operation, 0).AsHalfFloat()), Type::Float};
1659 }
1660
1661 template <Type type>
1662 Expression Min(Operation operation) {
1663 return GenerateBinaryCall(operation, "min", type, type, type);
1664 }
1665
1666 template <Type type>
1667 Expression Max(Operation operation) {
1668 return GenerateBinaryCall(operation, "max", type, type, type);
1669 }
1670
1671 Expression Select(Operation operation) {
1672 const std::string condition = Visit(operation[0]).AsBool();
1673 const std::string true_case = Visit(operation[1]).AsUint();
1674 const std::string false_case = Visit(operation[2]).AsUint();
1675 std::string op_str = fmt::format("({} ? {} : {})", condition, true_case, false_case);
1676
1677 return ApplyPrecise(operation, std::move(op_str), Type::Uint);
1678 }
1679
1680 Expression FCos(Operation operation) {
1681 return GenerateUnary(operation, "cos", Type::Float, Type::Float);
1682 }
1683
1684 Expression FSin(Operation operation) {
1685 return GenerateUnary(operation, "sin", Type::Float, Type::Float);
1686 }
1687
1688 Expression FExp2(Operation operation) {
1689 return GenerateUnary(operation, "exp2", Type::Float, Type::Float);
1690 }
1691
1692 Expression FLog2(Operation operation) {
1693 return GenerateUnary(operation, "log2", Type::Float, Type::Float);
1694 }
1695
1696 Expression FInverseSqrt(Operation operation) {
1697 return GenerateUnary(operation, "inversesqrt", Type::Float, Type::Float);
1698 }
1699
1700 Expression FSqrt(Operation operation) {
1701 return GenerateUnary(operation, "sqrt", Type::Float, Type::Float);
1702 }
1703
1704 Expression FRoundEven(Operation operation) {
1705 return GenerateUnary(operation, "roundEven", Type::Float, Type::Float);
1706 }
1707
1708 Expression FFloor(Operation operation) {
1709 return GenerateUnary(operation, "floor", Type::Float, Type::Float);
1710 }
1711
1712 Expression FCeil(Operation operation) {
1713 return GenerateUnary(operation, "ceil", Type::Float, Type::Float);
1714 }
1715
1716 Expression FTrunc(Operation operation) {
1717 return GenerateUnary(operation, "trunc", Type::Float, Type::Float);
1718 }
1719
1720 template <Type type>
1721 Expression FCastInteger(Operation operation) {
1722 return GenerateUnary(operation, "float", Type::Float, type);
1723 }
1724
1725 Expression FSwizzleAdd(Operation operation) {
1726 const std::string op_a = VisitOperand(operation, 0).AsFloat();
1727 const std::string op_b = VisitOperand(operation, 1).AsFloat();
1728
1729 if (!device.HasShaderBallot()) {
1730 LOG_ERROR(Render_OpenGL, "Shader ballot is unavailable but required by the shader");
1731 return {fmt::format("{} + {}", op_a, op_b), Type::Float};
1732 }
1733
1734 const std::string instr_mask = VisitOperand(operation, 2).AsUint();
1735 const std::string mask = code.GenerateTemporary();
1736 code.AddLine("uint {} = ({} >> ((gl_SubGroupInvocationARB & 3) << 1)) & 3;", mask,
1737 instr_mask);
1738
1739 const std::string modifier_a = fmt::format("fswzadd_modifiers_a[{}]", mask);
1740 const std::string modifier_b = fmt::format("fswzadd_modifiers_b[{}]", mask);
1741 return {fmt::format("(({} * {}) + ({} * {}))", op_a, modifier_a, op_b, modifier_b),
1742 Type::Float};
1743 }
1744
1745 Expression ICastFloat(Operation operation) {
1746 return GenerateUnary(operation, "int", Type::Int, Type::Float);
1747 }
1748
1749 Expression ICastUnsigned(Operation operation) {
1750 return GenerateUnary(operation, "int", Type::Int, Type::Uint);
1751 }
1752
1753 template <Type type>
1754 Expression LogicalShiftLeft(Operation operation) {
1755 return GenerateBinaryInfix(operation, "<<", type, type, Type::Uint);
1756 }
1757
1758 Expression ILogicalShiftRight(Operation operation) {
1759 const std::string op_a = VisitOperand(operation, 0).AsUint();
1760 const std::string op_b = VisitOperand(operation, 1).AsUint();
1761 std::string op_str = fmt::format("int({} >> {})", op_a, op_b);
1762
1763 return ApplyPrecise(operation, std::move(op_str), Type::Int);
1764 }
1765
1766 Expression IArithmeticShiftRight(Operation operation) {
1767 return GenerateBinaryInfix(operation, ">>", Type::Int, Type::Int, Type::Uint);
1768 }
1769
1770 template <Type type>
1771 Expression BitwiseAnd(Operation operation) {
1772 return GenerateBinaryInfix(operation, "&", type, type, type);
1773 }
1774
1775 template <Type type>
1776 Expression BitwiseOr(Operation operation) {
1777 return GenerateBinaryInfix(operation, "|", type, type, type);
1778 }
1779
1780 template <Type type>
1781 Expression BitwiseXor(Operation operation) {
1782 return GenerateBinaryInfix(operation, "^", type, type, type);
1783 }
1784
1785 template <Type type>
1786 Expression BitwiseNot(Operation operation) {
1787 return GenerateUnary(operation, "~", type, type);
1788 }
1789
1790 Expression UCastFloat(Operation operation) {
1791 return GenerateUnary(operation, "uint", Type::Uint, Type::Float);
1792 }
1793
1794 Expression UCastSigned(Operation operation) {
1795 return GenerateUnary(operation, "uint", Type::Uint, Type::Int);
1796 }
1797
1798 Expression UShiftRight(Operation operation) {
1799 return GenerateBinaryInfix(operation, ">>", Type::Uint, Type::Uint, Type::Uint);
1800 }
1801
1802 template <Type type>
1803 Expression BitfieldInsert(Operation operation) {
1804 return GenerateQuaternary(operation, "bitfieldInsert", type, type, type, Type::Int,
1805 Type::Int);
1806 }
1807
1808 template <Type type>
1809 Expression BitfieldExtract(Operation operation) {
1810 return GenerateTernary(operation, "bitfieldExtract", type, type, Type::Int, Type::Int);
1811 }
1812
1813 template <Type type>
1814 Expression BitCount(Operation operation) {
1815 return GenerateUnary(operation, "bitCount", type, type);
1816 }
1817
1818 template <Type type>
1819 Expression BitMSB(Operation operation) {
1820 return GenerateUnary(operation, "findMSB", type, type);
1821 }
1822
1823 Expression HNegate(Operation operation) {
1824 const auto GetNegate = [&](std::size_t index) {
1825 return VisitOperand(operation, index).AsBool() + " ? -1 : 1";
1826 };
1827 return {fmt::format("({} * vec2({}, {}))", VisitOperand(operation, 0).AsHalfFloat(),
1828 GetNegate(1), GetNegate(2)),
1829 Type::HalfFloat};
1830 }
1831
1832 Expression HClamp(Operation operation) {
1833 const std::string value = VisitOperand(operation, 0).AsHalfFloat();
1834 const std::string min = VisitOperand(operation, 1).AsFloat();
1835 const std::string max = VisitOperand(operation, 2).AsFloat();
1836 std::string clamped = fmt::format("clamp({}, vec2({}), vec2({}))", value, min, max);
1837
1838 return ApplyPrecise(operation, std::move(clamped), Type::HalfFloat);
1839 }
1840
1841 Expression HCastFloat(Operation operation) {
1842 return {fmt::format("vec2({}, 0.0f)", VisitOperand(operation, 0).AsFloat()),
1843 Type::HalfFloat};
1844 }
1845
1846 Expression HUnpack(Operation operation) {
1847 Expression operand = VisitOperand(operation, 0);
1848 switch (std::get<Tegra::Shader::HalfType>(operation.GetMeta())) {
1849 case Tegra::Shader::HalfType::H0_H1:
1850 return operand;
1851 case Tegra::Shader::HalfType::F32:
1852 return {fmt::format("vec2({})", operand.AsFloat()), Type::HalfFloat};
1853 case Tegra::Shader::HalfType::H0_H0:
1854 return {fmt::format("vec2({}[0])", operand.AsHalfFloat()), Type::HalfFloat};
1855 case Tegra::Shader::HalfType::H1_H1:
1856 return {fmt::format("vec2({}[1])", operand.AsHalfFloat()), Type::HalfFloat};
1857 }
1858 UNREACHABLE();
1859 return {"0", Type::Int};
1860 }
1861
1862 Expression HMergeF32(Operation operation) {
1863 return {fmt::format("float({}[0])", VisitOperand(operation, 0).AsHalfFloat()), Type::Float};
1864 }
1865
1866 Expression HMergeH0(Operation operation) {
1867 const std::string dest = VisitOperand(operation, 0).AsUint();
1868 const std::string src = VisitOperand(operation, 1).AsUint();
1869 return {fmt::format("vec2(unpackHalf2x16({}).x, unpackHalf2x16({}).y)", src, dest),
1870 Type::HalfFloat};
1871 }
1872
1873 Expression HMergeH1(Operation operation) {
1874 const std::string dest = VisitOperand(operation, 0).AsUint();
1875 const std::string src = VisitOperand(operation, 1).AsUint();
1876 return {fmt::format("vec2(unpackHalf2x16({}).x, unpackHalf2x16({}).y)", dest, src),
1877 Type::HalfFloat};
1878 }
1879
1880 Expression HPack2(Operation operation) {
1881 return {fmt::format("vec2({}, {})", VisitOperand(operation, 0).AsFloat(),
1882 VisitOperand(operation, 1).AsFloat()),
1883 Type::HalfFloat};
1884 }
1885
1886 template <const std::string_view& op, Type type, bool unordered = false>
1887 Expression Comparison(Operation operation) {
1888 static_assert(!unordered || type == Type::Float);
1889
1890 Expression expr = GenerateBinaryInfix(operation, op, Type::Bool, type, type);
1891
1892 if constexpr (op.compare("!=") == 0 && type == Type::Float && !unordered) {
1893 // GLSL's operator!=(float, float) doesn't seem be ordered. This happens on both AMD's
1894 // and Nvidia's proprietary stacks. Manually force an ordered comparison.
1895 return {fmt::format("({} && !isnan({}) && !isnan({}))", expr.AsBool(),
1896 VisitOperand(operation, 0).AsFloat(),
1897 VisitOperand(operation, 1).AsFloat()),
1898 Type::Bool};
1899 }
1900 if constexpr (!unordered) {
1901 return expr;
1902 }
1903 // Unordered comparisons are always true for NaN operands.
1904 return {fmt::format("({} || isnan({}) || isnan({}))", expr.AsBool(),
1905 VisitOperand(operation, 0).AsFloat(),
1906 VisitOperand(operation, 1).AsFloat()),
1907 Type::Bool};
1908 }
1909
1910 Expression FOrdered(Operation operation) {
1911 return {fmt::format("(!isnan({}) && !isnan({}))", VisitOperand(operation, 0).AsFloat(),
1912 VisitOperand(operation, 1).AsFloat()),
1913 Type::Bool};
1914 }
1915
1916 Expression FUnordered(Operation operation) {
1917 return {fmt::format("(isnan({}) || isnan({}))", VisitOperand(operation, 0).AsFloat(),
1918 VisitOperand(operation, 1).AsFloat()),
1919 Type::Bool};
1920 }
1921
1922 Expression LogicalAddCarry(Operation operation) {
1923 const std::string carry = code.GenerateTemporary();
1924 code.AddLine("uint {};", carry);
1925 code.AddLine("uaddCarry({}, {}, {});", VisitOperand(operation, 0).AsUint(),
1926 VisitOperand(operation, 1).AsUint(), carry);
1927 return {fmt::format("({} != 0)", carry), Type::Bool};
1928 }
1929
1930 Expression LogicalAssign(Operation operation) {
1931 const Node& dest = operation[0];
1932 const Node& src = operation[1];
1933
1934 std::string target;
1935
1936 if (const auto pred = std::get_if<PredicateNode>(&*dest)) {
1937 ASSERT_MSG(!pred->IsNegated(), "Negating logical assignment");
1938
1939 const auto index = pred->GetIndex();
1940 switch (index) {
1941 case Tegra::Shader::Pred::NeverExecute:
1942 case Tegra::Shader::Pred::UnusedIndex:
1943 // Writing to these predicates is a no-op
1944 return {};
1945 }
1946 target = GetPredicate(index);
1947 } else if (const auto flag = std::get_if<InternalFlagNode>(&*dest)) {
1948 target = GetInternalFlag(flag->GetFlag());
1949 }
1950
1951 code.AddLine("{} = {};", target, Visit(src).AsBool());
1952 return {};
1953 }
1954
1955 Expression LogicalAnd(Operation operation) {
1956 return GenerateBinaryInfix(operation, "&&", Type::Bool, Type::Bool, Type::Bool);
1957 }
1958
1959 Expression LogicalOr(Operation operation) {
1960 return GenerateBinaryInfix(operation, "||", Type::Bool, Type::Bool, Type::Bool);
1961 }
1962
1963 Expression LogicalXor(Operation operation) {
1964 return GenerateBinaryInfix(operation, "^^", Type::Bool, Type::Bool, Type::Bool);
1965 }
1966
1967 Expression LogicalNegate(Operation operation) {
1968 return GenerateUnary(operation, "!", Type::Bool, Type::Bool);
1969 }
1970
1971 Expression LogicalPick2(Operation operation) {
1972 return {fmt::format("{}[{}]", VisitOperand(operation, 0).AsBool2(),
1973 VisitOperand(operation, 1).AsUint()),
1974 Type::Bool};
1975 }
1976
1977 Expression LogicalAnd2(Operation operation) {
1978 return GenerateUnary(operation, "all", Type::Bool, Type::Bool2);
1979 }
1980
1981 template <bool with_nan>
1982 Expression GenerateHalfComparison(Operation operation, std::string_view compare_op) {
1983 Expression comparison = GenerateBinaryCall(operation, compare_op, Type::Bool2,
1984 Type::HalfFloat, Type::HalfFloat);
1985 if constexpr (!with_nan) {
1986 return comparison;
1987 }
1988 return {fmt::format("HalfFloatNanComparison({}, {}, {})", comparison.AsBool2(),
1989 VisitOperand(operation, 0).AsHalfFloat(),
1990 VisitOperand(operation, 1).AsHalfFloat()),
1991 Type::Bool2};
1992 }
1993
1994 template <bool with_nan>
1995 Expression Logical2HLessThan(Operation operation) {
1996 return GenerateHalfComparison<with_nan>(operation, "lessThan");
1997 }
1998
1999 template <bool with_nan>
2000 Expression Logical2HEqual(Operation operation) {
2001 return GenerateHalfComparison<with_nan>(operation, "equal");
2002 }
2003
2004 template <bool with_nan>
2005 Expression Logical2HLessEqual(Operation operation) {
2006 return GenerateHalfComparison<with_nan>(operation, "lessThanEqual");
2007 }
2008
2009 template <bool with_nan>
2010 Expression Logical2HGreaterThan(Operation operation) {
2011 return GenerateHalfComparison<with_nan>(operation, "greaterThan");
2012 }
2013
2014 template <bool with_nan>
2015 Expression Logical2HNotEqual(Operation operation) {
2016 return GenerateHalfComparison<with_nan>(operation, "notEqual");
2017 }
2018
2019 template <bool with_nan>
2020 Expression Logical2HGreaterEqual(Operation operation) {
2021 return GenerateHalfComparison<with_nan>(operation, "greaterThanEqual");
2022 }
2023
2024 Expression Texture(Operation operation) {
2025 const auto meta = std::get<MetaTexture>(operation.GetMeta());
2026 const bool separate_dc = meta.sampler.type == TextureType::TextureCube &&
2027 meta.sampler.is_array && meta.sampler.is_shadow;
2028 // TODO: Replace this with an array and make GenerateTexture use C++20 std::span
2029 const std::vector<TextureIR> extras{
2030 TextureOffset{},
2031 TextureArgument{Type::Float, meta.bias},
2032 };
2033 std::string expr = GenerateTexture(operation, "", extras, separate_dc);
2034 if (meta.sampler.is_shadow) {
2035 expr = fmt::format("vec4({})", expr);
2036 }
2037 return {expr + GetSwizzle(meta.element), Type::Float};
2038 }
2039
2040 Expression TextureLod(Operation operation) {
2041 const auto meta = std::get_if<MetaTexture>(&operation.GetMeta());
2042 ASSERT(meta);
2043
2044 std::string expr{};
2045
2046 if (!device.HasTextureShadowLod() && meta->sampler.is_shadow &&
2047 ((meta->sampler.type == TextureType::Texture2D && meta->sampler.is_array) ||
2048 meta->sampler.type == TextureType::TextureCube)) {
2049 LOG_ERROR(Render_OpenGL,
2050 "Device lacks GL_EXT_texture_shadow_lod, using textureGrad as a workaround");
2051 expr = GenerateTexture(operation, "Lod", {});
2052 } else {
2053 expr = GenerateTexture(operation, "Lod",
2054 {TextureArgument{Type::Float, meta->lod}, TextureOffset{}});
2055 }
2056
2057 if (meta->sampler.is_shadow) {
2058 expr = "vec4(" + expr + ')';
2059 }
2060 return {expr + GetSwizzle(meta->element), Type::Float};
2061 }
2062
2063 Expression TextureGather(Operation operation) {
2064 const auto& meta = std::get<MetaTexture>(operation.GetMeta());
2065
2066 const auto type = meta.sampler.is_shadow ? Type::Float : Type::Int;
2067 const bool separate_dc = meta.sampler.is_shadow;
2068
2069 std::vector<TextureIR> ir_;
2070 if (meta.sampler.is_shadow) {
2071 ir_ = {TextureOffset{}};
2072 } else {
2073 ir_ = {TextureOffset{}, TextureArgument{type, meta.component}};
2074 }
2075 return {GenerateTexture(operation, "Gather", ir_, separate_dc) + GetSwizzle(meta.element),
2076 Type::Float};
2077 }
2078
2079 Expression TextureQueryDimensions(Operation operation) {
2080 const auto meta = std::get_if<MetaTexture>(&operation.GetMeta());
2081 ASSERT(meta);
2082
2083 const std::string sampler = GetSampler(meta->sampler);
2084 const std::string lod = VisitOperand(operation, 0).AsInt();
2085
2086 switch (meta->element) {
2087 case 0:
2088 case 1:
2089 return {fmt::format("textureSize({}, {}){}", sampler, lod, GetSwizzle(meta->element)),
2090 Type::Int};
2091 case 3:
2092 return {fmt::format("textureQueryLevels({})", sampler), Type::Int};
2093 }
2094 UNREACHABLE();
2095 return {"0", Type::Int};
2096 }
2097
2098 Expression TextureQueryLod(Operation operation) {
2099 const auto meta = std::get_if<MetaTexture>(&operation.GetMeta());
2100 ASSERT(meta);
2101
2102 if (meta->element < 2) {
2103 return {fmt::format("int(({} * vec2(256)){})",
2104 GenerateTexture(operation, "QueryLod", {}),
2105 GetSwizzle(meta->element)),
2106 Type::Int};
2107 }
2108 return {"0", Type::Int};
2109 }
2110
2111 Expression TexelFetch(Operation operation) {
2112 constexpr std::array constructors = {"int", "ivec2", "ivec3", "ivec4"};
2113 const auto meta = std::get_if<MetaTexture>(&operation.GetMeta());
2114 ASSERT(meta);
2115 UNIMPLEMENTED_IF(meta->sampler.is_array);
2116 const std::size_t count = operation.GetOperandsCount();
2117
2118 std::string expr = "texelFetch(";
2119 expr += GetSampler(meta->sampler);
2120 expr += ", ";
2121
2122 expr += constructors.at(operation.GetOperandsCount() + (meta->array ? 1 : 0) - 1);
2123 expr += '(';
2124 for (std::size_t i = 0; i < count; ++i) {
2125 if (i > 0) {
2126 expr += ", ";
2127 }
2128 expr += VisitOperand(operation, i).AsInt();
2129 }
2130 if (meta->array) {
2131 expr += ", ";
2132 expr += Visit(meta->array).AsInt();
2133 }
2134 expr += ')';
2135
2136 if (meta->lod && !meta->sampler.is_buffer) {
2137 expr += ", ";
2138 expr += Visit(meta->lod).AsInt();
2139 }
2140 expr += ')';
2141 expr += GetSwizzle(meta->element);
2142
2143 return {std::move(expr), Type::Float};
2144 }
2145
2146 Expression TextureGradient(Operation operation) {
2147 const auto& meta = std::get<MetaTexture>(operation.GetMeta());
2148 std::string expr =
2149 GenerateTexture(operation, "Grad", {TextureDerivates{}, TextureOffset{}});
2150 return {std::move(expr) + GetSwizzle(meta.element), Type::Float};
2151 }
2152
2153 Expression ImageLoad(Operation operation) {
2154 if (!device.HasImageLoadFormatted()) {
2155 LOG_ERROR(Render_OpenGL,
2156 "Device lacks GL_EXT_shader_image_load_formatted, stubbing image load");
2157 return {"0", Type::Int};
2158 }
2159
2160 const auto& meta{std::get<MetaImage>(operation.GetMeta())};
2161 return {fmt::format("imageLoad({}, {}){}", GetImage(meta.image),
2162 BuildIntegerCoordinates(operation), GetSwizzle(meta.element)),
2163 Type::Uint};
2164 }
2165
2166 Expression ImageStore(Operation operation) {
2167 const auto& meta{std::get<MetaImage>(operation.GetMeta())};
2168 code.AddLine("imageStore({}, {}, {});", GetImage(meta.image),
2169 BuildIntegerCoordinates(operation), BuildImageValues(operation));
2170 return {};
2171 }
2172
2173 template <const std::string_view& opname>
2174 Expression AtomicImage(Operation operation) {
2175 const auto& meta{std::get<MetaImage>(operation.GetMeta())};
2176 ASSERT(meta.values.size() == 1);
2177
2178 return {fmt::format("imageAtomic{}({}, {}, {})", opname, GetImage(meta.image),
2179 BuildIntegerCoordinates(operation), Visit(meta.values[0]).AsUint()),
2180 Type::Uint};
2181 }
2182
2183 template <const std::string_view& opname, Type type>
2184 Expression Atomic(Operation operation) {
2185 if ((opname == Func::Min || opname == Func::Max) && type == Type::Int) {
2186 UNIMPLEMENTED_MSG("Unimplemented Min & Max for atomic operations");
2187 return {};
2188 }
2189 return {fmt::format("atomic{}({}, {})", opname, Visit(operation[0]).GetCode(),
2190 Visit(operation[1]).AsUint()),
2191 Type::Uint};
2192 }
2193
2194 template <const std::string_view& opname, Type type>
2195 Expression Reduce(Operation operation) {
2196 code.AddLine("{};", Atomic<opname, type>(operation).GetCode());
2197 return {};
2198 }
2199
2200 Expression Branch(Operation operation) {
2201 const auto target = std::get_if<ImmediateNode>(&*operation[0]);
2202 UNIMPLEMENTED_IF(!target);
2203
2204 code.AddLine("jmp_to = 0x{:X}U;", target->GetValue());
2205 code.AddLine("break;");
2206 return {};
2207 }
2208
2209 Expression BranchIndirect(Operation operation) {
2210 const std::string op_a = VisitOperand(operation, 0).AsUint();
2211
2212 code.AddLine("jmp_to = {};", op_a);
2213 code.AddLine("break;");
2214 return {};
2215 }
2216
2217 Expression PushFlowStack(Operation operation) {
2218 const auto stack = std::get<MetaStackClass>(operation.GetMeta());
2219 const auto target = std::get_if<ImmediateNode>(&*operation[0]);
2220 UNIMPLEMENTED_IF(!target);
2221
2222 code.AddLine("{}[{}++] = 0x{:X}U;", FlowStackName(stack), FlowStackTopName(stack),
2223 target->GetValue());
2224 return {};
2225 }
2226
2227 Expression PopFlowStack(Operation operation) {
2228 const auto stack = std::get<MetaStackClass>(operation.GetMeta());
2229 code.AddLine("jmp_to = {}[--{}];", FlowStackName(stack), FlowStackTopName(stack));
2230 code.AddLine("break;");
2231 return {};
2232 }
2233
2234 void PreExit() {
2235 if (stage != ShaderType::Fragment) {
2236 return;
2237 }
2238 const auto& used_registers = ir.GetRegisters();
2239 const auto SafeGetRegister = [&](u32 reg) -> Expression {
2240 // TODO(Rodrigo): Replace with contains once C++20 releases
2241 if (used_registers.find(reg) != used_registers.end()) {
2242 return {GetRegister(reg), Type::Float};
2243 }
2244 return {"0.0f", Type::Float};
2245 };
2246
2247 UNIMPLEMENTED_IF_MSG(header.ps.omap.sample_mask != 0, "Sample mask write is unimplemented");
2248
2249 // Write the color outputs using the data in the shader registers, disabled
2250 // rendertargets/components are skipped in the register assignment.
2251 u32 current_reg = 0;
2252 for (u32 render_target = 0; render_target < Maxwell::NumRenderTargets; ++render_target) {
2253 // TODO(Subv): Figure out how dual-source blending is configured in the Switch.
2254 for (u32 component = 0; component < 4; ++component) {
2255 if (header.ps.IsColorComponentOutputEnabled(render_target, component)) {
2256 code.AddLine("frag_color{}{} = {};", render_target, GetColorSwizzle(component),
2257 SafeGetRegister(current_reg).AsFloat());
2258 ++current_reg;
2259 }
2260 }
2261 }
2262 if (header.ps.omap.depth) {
2263 // The depth output is always 2 registers after the last color output, and current_reg
2264 // already contains one past the last color register.
2265 code.AddLine("gl_FragDepth = {};", SafeGetRegister(current_reg + 1).AsFloat());
2266 }
2267 }
2268
2269 Expression Exit(Operation operation) {
2270 PreExit();
2271 code.AddLine("return;");
2272 return {};
2273 }
2274
2275 Expression Discard(Operation operation) {
2276 // Enclose "discard" in a conditional, so that GLSL compilation does not complain
2277 // about unexecuted instructions that may follow this.
2278 code.AddLine("if (true) {{");
2279 ++code.scope;
2280 code.AddLine("discard;");
2281 --code.scope;
2282 code.AddLine("}}");
2283 return {};
2284 }
2285
2286 Expression EmitVertex(Operation operation) {
2287 ASSERT_MSG(stage == ShaderType::Geometry,
2288 "EmitVertex is expected to be used in a geometry shader.");
2289 code.AddLine("EmitVertex();");
2290 return {};
2291 }
2292
2293 Expression EndPrimitive(Operation operation) {
2294 ASSERT_MSG(stage == ShaderType::Geometry,
2295 "EndPrimitive is expected to be used in a geometry shader.");
2296 code.AddLine("EndPrimitive();");
2297 return {};
2298 }
2299
2300 Expression InvocationId(Operation operation) {
2301 return {"gl_InvocationID", Type::Int};
2302 }
2303
2304 Expression YNegate(Operation operation) {
2305 // Y_NEGATE is mapped to this uniform value
2306 return {"gl_FrontMaterial.ambient.a", Type::Float};
2307 }
2308
2309 template <u32 element>
2310 Expression LocalInvocationId(Operation) {
2311 return {"gl_LocalInvocationID"s + GetSwizzle(element), Type::Uint};
2312 }
2313
2314 template <u32 element>
2315 Expression WorkGroupId(Operation) {
2316 return {"gl_WorkGroupID"s + GetSwizzle(element), Type::Uint};
2317 }
2318
2319 Expression BallotThread(Operation operation) {
2320 const std::string value = VisitOperand(operation, 0).AsBool();
2321 if (!device.HasWarpIntrinsics()) {
2322 LOG_ERROR(Render_OpenGL, "Nvidia vote intrinsics are required by this shader");
2323 // Stub on non-Nvidia devices by simulating all threads voting the same as the active
2324 // one.
2325 return {fmt::format("({} ? 0xFFFFFFFFU : 0U)", value), Type::Uint};
2326 }
2327 return {fmt::format("ballotThreadNV({})", value), Type::Uint};
2328 }
2329
2330 Expression Vote(Operation operation, const char* func) {
2331 const std::string value = VisitOperand(operation, 0).AsBool();
2332 if (!device.HasWarpIntrinsics()) {
2333 LOG_ERROR(Render_OpenGL, "Nvidia vote intrinsics are required by this shader");
2334 // Stub with a warp size of one.
2335 return {value, Type::Bool};
2336 }
2337 return {fmt::format("{}({})", func, value), Type::Bool};
2338 }
2339
2340 Expression VoteAll(Operation operation) {
2341 return Vote(operation, "allThreadsNV");
2342 }
2343
2344 Expression VoteAny(Operation operation) {
2345 return Vote(operation, "anyThreadNV");
2346 }
2347
2348 Expression VoteEqual(Operation operation) {
2349 if (!device.HasWarpIntrinsics()) {
2350 LOG_ERROR(Render_OpenGL, "Nvidia vote intrinsics are required by this shader");
2351 // We must return true here since a stub for a theoretical warp size of 1.
2352 // This will always return an equal result across all votes.
2353 return {"true", Type::Bool};
2354 }
2355 return Vote(operation, "allThreadsEqualNV");
2356 }
2357
2358 Expression ThreadId(Operation operation) {
2359 if (!device.HasShaderBallot()) {
2360 LOG_ERROR(Render_OpenGL, "Shader ballot is unavailable but required by the shader");
2361 return {"0U", Type::Uint};
2362 }
2363 return {"gl_SubGroupInvocationARB", Type::Uint};
2364 }
2365
2366 template <const std::string_view& comparison>
2367 Expression ThreadMask(Operation) {
2368 if (device.HasWarpIntrinsics()) {
2369 return {fmt::format("gl_Thread{}MaskNV", comparison), Type::Uint};
2370 }
2371 if (device.HasShaderBallot()) {
2372 return {fmt::format("uint(gl_SubGroup{}MaskARB)", comparison), Type::Uint};
2373 }
2374 LOG_ERROR(Render_OpenGL, "Thread mask intrinsics are required by the shader");
2375 return {"0U", Type::Uint};
2376 }
2377
2378 Expression ShuffleIndexed(Operation operation) {
2379 std::string value = VisitOperand(operation, 0).AsFloat();
2380
2381 if (!device.HasShaderBallot()) {
2382 LOG_ERROR(Render_OpenGL, "Shader ballot is unavailable but required by the shader");
2383 return {std::move(value), Type::Float};
2384 }
2385
2386 const std::string index = VisitOperand(operation, 1).AsUint();
2387 return {fmt::format("readInvocationARB({}, {})", value, index), Type::Float};
2388 }
2389
2390 Expression Barrier(Operation) {
2391 if (!ir.IsDecompiled()) {
2392 LOG_ERROR(Render_OpenGL, "barrier() used but shader is not decompiled");
2393 return {};
2394 }
2395 code.AddLine("barrier();");
2396 return {};
2397 }
2398
2399 Expression MemoryBarrierGroup(Operation) {
2400 code.AddLine("groupMemoryBarrier();");
2401 return {};
2402 }
2403
2404 Expression MemoryBarrierGlobal(Operation) {
2405 code.AddLine("memoryBarrier();");
2406 return {};
2407 }
2408
2409 struct Func final {
2410 Func() = delete;
2411 ~Func() = delete;
2412
2413 static constexpr std::string_view LessThan = "<";
2414 static constexpr std::string_view Equal = "==";
2415 static constexpr std::string_view LessEqual = "<=";
2416 static constexpr std::string_view GreaterThan = ">";
2417 static constexpr std::string_view NotEqual = "!=";
2418 static constexpr std::string_view GreaterEqual = ">=";
2419
2420 static constexpr std::string_view Eq = "Eq";
2421 static constexpr std::string_view Ge = "Ge";
2422 static constexpr std::string_view Gt = "Gt";
2423 static constexpr std::string_view Le = "Le";
2424 static constexpr std::string_view Lt = "Lt";
2425
2426 static constexpr std::string_view Add = "Add";
2427 static constexpr std::string_view Min = "Min";
2428 static constexpr std::string_view Max = "Max";
2429 static constexpr std::string_view And = "And";
2430 static constexpr std::string_view Or = "Or";
2431 static constexpr std::string_view Xor = "Xor";
2432 static constexpr std::string_view Exchange = "Exchange";
2433 };
2434
2435 static constexpr std::array operation_decompilers = {
2436 &GLSLDecompiler::Assign,
2437
2438 &GLSLDecompiler::Select,
2439
2440 &GLSLDecompiler::Add<Type::Float>,
2441 &GLSLDecompiler::Mul<Type::Float>,
2442 &GLSLDecompiler::Div<Type::Float>,
2443 &GLSLDecompiler::Fma<Type::Float>,
2444 &GLSLDecompiler::Negate<Type::Float>,
2445 &GLSLDecompiler::Absolute<Type::Float>,
2446 &GLSLDecompiler::FClamp,
2447 &GLSLDecompiler::FCastHalf0,
2448 &GLSLDecompiler::FCastHalf1,
2449 &GLSLDecompiler::Min<Type::Float>,
2450 &GLSLDecompiler::Max<Type::Float>,
2451 &GLSLDecompiler::FCos,
2452 &GLSLDecompiler::FSin,
2453 &GLSLDecompiler::FExp2,
2454 &GLSLDecompiler::FLog2,
2455 &GLSLDecompiler::FInverseSqrt,
2456 &GLSLDecompiler::FSqrt,
2457 &GLSLDecompiler::FRoundEven,
2458 &GLSLDecompiler::FFloor,
2459 &GLSLDecompiler::FCeil,
2460 &GLSLDecompiler::FTrunc,
2461 &GLSLDecompiler::FCastInteger<Type::Int>,
2462 &GLSLDecompiler::FCastInteger<Type::Uint>,
2463 &GLSLDecompiler::FSwizzleAdd,
2464
2465 &GLSLDecompiler::Add<Type::Int>,
2466 &GLSLDecompiler::Mul<Type::Int>,
2467 &GLSLDecompiler::Div<Type::Int>,
2468 &GLSLDecompiler::Negate<Type::Int>,
2469 &GLSLDecompiler::Absolute<Type::Int>,
2470 &GLSLDecompiler::Min<Type::Int>,
2471 &GLSLDecompiler::Max<Type::Int>,
2472
2473 &GLSLDecompiler::ICastFloat,
2474 &GLSLDecompiler::ICastUnsigned,
2475 &GLSLDecompiler::LogicalShiftLeft<Type::Int>,
2476 &GLSLDecompiler::ILogicalShiftRight,
2477 &GLSLDecompiler::IArithmeticShiftRight,
2478 &GLSLDecompiler::BitwiseAnd<Type::Int>,
2479 &GLSLDecompiler::BitwiseOr<Type::Int>,
2480 &GLSLDecompiler::BitwiseXor<Type::Int>,
2481 &GLSLDecompiler::BitwiseNot<Type::Int>,
2482 &GLSLDecompiler::BitfieldInsert<Type::Int>,
2483 &GLSLDecompiler::BitfieldExtract<Type::Int>,
2484 &GLSLDecompiler::BitCount<Type::Int>,
2485 &GLSLDecompiler::BitMSB<Type::Int>,
2486
2487 &GLSLDecompiler::Add<Type::Uint>,
2488 &GLSLDecompiler::Mul<Type::Uint>,
2489 &GLSLDecompiler::Div<Type::Uint>,
2490 &GLSLDecompiler::Min<Type::Uint>,
2491 &GLSLDecompiler::Max<Type::Uint>,
2492 &GLSLDecompiler::UCastFloat,
2493 &GLSLDecompiler::UCastSigned,
2494 &GLSLDecompiler::LogicalShiftLeft<Type::Uint>,
2495 &GLSLDecompiler::UShiftRight,
2496 &GLSLDecompiler::UShiftRight,
2497 &GLSLDecompiler::BitwiseAnd<Type::Uint>,
2498 &GLSLDecompiler::BitwiseOr<Type::Uint>,
2499 &GLSLDecompiler::BitwiseXor<Type::Uint>,
2500 &GLSLDecompiler::BitwiseNot<Type::Uint>,
2501 &GLSLDecompiler::BitfieldInsert<Type::Uint>,
2502 &GLSLDecompiler::BitfieldExtract<Type::Uint>,
2503 &GLSLDecompiler::BitCount<Type::Uint>,
2504 &GLSLDecompiler::BitMSB<Type::Uint>,
2505
2506 &GLSLDecompiler::Add<Type::HalfFloat>,
2507 &GLSLDecompiler::Mul<Type::HalfFloat>,
2508 &GLSLDecompiler::Fma<Type::HalfFloat>,
2509 &GLSLDecompiler::Absolute<Type::HalfFloat>,
2510 &GLSLDecompiler::HNegate,
2511 &GLSLDecompiler::HClamp,
2512 &GLSLDecompiler::HCastFloat,
2513 &GLSLDecompiler::HUnpack,
2514 &GLSLDecompiler::HMergeF32,
2515 &GLSLDecompiler::HMergeH0,
2516 &GLSLDecompiler::HMergeH1,
2517 &GLSLDecompiler::HPack2,
2518
2519 &GLSLDecompiler::LogicalAssign,
2520 &GLSLDecompiler::LogicalAnd,
2521 &GLSLDecompiler::LogicalOr,
2522 &GLSLDecompiler::LogicalXor,
2523 &GLSLDecompiler::LogicalNegate,
2524 &GLSLDecompiler::LogicalPick2,
2525 &GLSLDecompiler::LogicalAnd2,
2526
2527 &GLSLDecompiler::Comparison<Func::LessThan, Type::Float, false>,
2528 &GLSLDecompiler::Comparison<Func::Equal, Type::Float, false>,
2529 &GLSLDecompiler::Comparison<Func::LessEqual, Type::Float, false>,
2530 &GLSLDecompiler::Comparison<Func::GreaterThan, Type::Float, false>,
2531 &GLSLDecompiler::Comparison<Func::NotEqual, Type::Float, false>,
2532 &GLSLDecompiler::Comparison<Func::GreaterEqual, Type::Float, false>,
2533 &GLSLDecompiler::FOrdered,
2534 &GLSLDecompiler::FUnordered,
2535 &GLSLDecompiler::Comparison<Func::LessThan, Type::Float, true>,
2536 &GLSLDecompiler::Comparison<Func::Equal, Type::Float, true>,
2537 &GLSLDecompiler::Comparison<Func::LessEqual, Type::Float, true>,
2538 &GLSLDecompiler::Comparison<Func::GreaterThan, Type::Float, true>,
2539 &GLSLDecompiler::Comparison<Func::NotEqual, Type::Float, true>,
2540 &GLSLDecompiler::Comparison<Func::GreaterEqual, Type::Float, true>,
2541
2542 &GLSLDecompiler::Comparison<Func::LessThan, Type::Int>,
2543 &GLSLDecompiler::Comparison<Func::Equal, Type::Int>,
2544 &GLSLDecompiler::Comparison<Func::LessEqual, Type::Int>,
2545 &GLSLDecompiler::Comparison<Func::GreaterThan, Type::Int>,
2546 &GLSLDecompiler::Comparison<Func::NotEqual, Type::Int>,
2547 &GLSLDecompiler::Comparison<Func::GreaterEqual, Type::Int>,
2548
2549 &GLSLDecompiler::Comparison<Func::LessThan, Type::Uint>,
2550 &GLSLDecompiler::Comparison<Func::Equal, Type::Uint>,
2551 &GLSLDecompiler::Comparison<Func::LessEqual, Type::Uint>,
2552 &GLSLDecompiler::Comparison<Func::GreaterThan, Type::Uint>,
2553 &GLSLDecompiler::Comparison<Func::NotEqual, Type::Uint>,
2554 &GLSLDecompiler::Comparison<Func::GreaterEqual, Type::Uint>,
2555
2556 &GLSLDecompiler::LogicalAddCarry,
2557
2558 &GLSLDecompiler::Logical2HLessThan<false>,
2559 &GLSLDecompiler::Logical2HEqual<false>,
2560 &GLSLDecompiler::Logical2HLessEqual<false>,
2561 &GLSLDecompiler::Logical2HGreaterThan<false>,
2562 &GLSLDecompiler::Logical2HNotEqual<false>,
2563 &GLSLDecompiler::Logical2HGreaterEqual<false>,
2564 &GLSLDecompiler::Logical2HLessThan<true>,
2565 &GLSLDecompiler::Logical2HEqual<true>,
2566 &GLSLDecompiler::Logical2HLessEqual<true>,
2567 &GLSLDecompiler::Logical2HGreaterThan<true>,
2568 &GLSLDecompiler::Logical2HNotEqual<true>,
2569 &GLSLDecompiler::Logical2HGreaterEqual<true>,
2570
2571 &GLSLDecompiler::Texture,
2572 &GLSLDecompiler::TextureLod,
2573 &GLSLDecompiler::TextureGather,
2574 &GLSLDecompiler::TextureQueryDimensions,
2575 &GLSLDecompiler::TextureQueryLod,
2576 &GLSLDecompiler::TexelFetch,
2577 &GLSLDecompiler::TextureGradient,
2578
2579 &GLSLDecompiler::ImageLoad,
2580 &GLSLDecompiler::ImageStore,
2581
2582 &GLSLDecompiler::AtomicImage<Func::Add>,
2583 &GLSLDecompiler::AtomicImage<Func::And>,
2584 &GLSLDecompiler::AtomicImage<Func::Or>,
2585 &GLSLDecompiler::AtomicImage<Func::Xor>,
2586 &GLSLDecompiler::AtomicImage<Func::Exchange>,
2587
2588 &GLSLDecompiler::Atomic<Func::Exchange, Type::Uint>,
2589 &GLSLDecompiler::Atomic<Func::Add, Type::Uint>,
2590 &GLSLDecompiler::Atomic<Func::Min, Type::Uint>,
2591 &GLSLDecompiler::Atomic<Func::Max, Type::Uint>,
2592 &GLSLDecompiler::Atomic<Func::And, Type::Uint>,
2593 &GLSLDecompiler::Atomic<Func::Or, Type::Uint>,
2594 &GLSLDecompiler::Atomic<Func::Xor, Type::Uint>,
2595
2596 &GLSLDecompiler::Atomic<Func::Exchange, Type::Int>,
2597 &GLSLDecompiler::Atomic<Func::Add, Type::Int>,
2598 &GLSLDecompiler::Atomic<Func::Min, Type::Int>,
2599 &GLSLDecompiler::Atomic<Func::Max, Type::Int>,
2600 &GLSLDecompiler::Atomic<Func::And, Type::Int>,
2601 &GLSLDecompiler::Atomic<Func::Or, Type::Int>,
2602 &GLSLDecompiler::Atomic<Func::Xor, Type::Int>,
2603
2604 &GLSLDecompiler::Reduce<Func::Add, Type::Uint>,
2605 &GLSLDecompiler::Reduce<Func::Min, Type::Uint>,
2606 &GLSLDecompiler::Reduce<Func::Max, Type::Uint>,
2607 &GLSLDecompiler::Reduce<Func::And, Type::Uint>,
2608 &GLSLDecompiler::Reduce<Func::Or, Type::Uint>,
2609 &GLSLDecompiler::Reduce<Func::Xor, Type::Uint>,
2610
2611 &GLSLDecompiler::Reduce<Func::Add, Type::Int>,
2612 &GLSLDecompiler::Reduce<Func::Min, Type::Int>,
2613 &GLSLDecompiler::Reduce<Func::Max, Type::Int>,
2614 &GLSLDecompiler::Reduce<Func::And, Type::Int>,
2615 &GLSLDecompiler::Reduce<Func::Or, Type::Int>,
2616 &GLSLDecompiler::Reduce<Func::Xor, Type::Int>,
2617
2618 &GLSLDecompiler::Branch,
2619 &GLSLDecompiler::BranchIndirect,
2620 &GLSLDecompiler::PushFlowStack,
2621 &GLSLDecompiler::PopFlowStack,
2622 &GLSLDecompiler::Exit,
2623 &GLSLDecompiler::Discard,
2624
2625 &GLSLDecompiler::EmitVertex,
2626 &GLSLDecompiler::EndPrimitive,
2627
2628 &GLSLDecompiler::InvocationId,
2629 &GLSLDecompiler::YNegate,
2630 &GLSLDecompiler::LocalInvocationId<0>,
2631 &GLSLDecompiler::LocalInvocationId<1>,
2632 &GLSLDecompiler::LocalInvocationId<2>,
2633 &GLSLDecompiler::WorkGroupId<0>,
2634 &GLSLDecompiler::WorkGroupId<1>,
2635 &GLSLDecompiler::WorkGroupId<2>,
2636
2637 &GLSLDecompiler::BallotThread,
2638 &GLSLDecompiler::VoteAll,
2639 &GLSLDecompiler::VoteAny,
2640 &GLSLDecompiler::VoteEqual,
2641
2642 &GLSLDecompiler::ThreadId,
2643 &GLSLDecompiler::ThreadMask<Func::Eq>,
2644 &GLSLDecompiler::ThreadMask<Func::Ge>,
2645 &GLSLDecompiler::ThreadMask<Func::Gt>,
2646 &GLSLDecompiler::ThreadMask<Func::Le>,
2647 &GLSLDecompiler::ThreadMask<Func::Lt>,
2648 &GLSLDecompiler::ShuffleIndexed,
2649
2650 &GLSLDecompiler::Barrier,
2651 &GLSLDecompiler::MemoryBarrierGroup,
2652 &GLSLDecompiler::MemoryBarrierGlobal,
2653 };
2654 static_assert(operation_decompilers.size() == static_cast<std::size_t>(OperationCode::Amount));
2655
2656 std::string GetRegister(u32 index) const {
2657 return AppendSuffix(index, "gpr");
2658 }
2659
2660 std::string GetCustomVariable(u32 index) const {
2661 return AppendSuffix(index, "custom_var");
2662 }
2663
2664 std::string GetPredicate(Tegra::Shader::Pred pred) const {
2665 return AppendSuffix(static_cast<u32>(pred), "pred");
2666 }
2667
2668 std::string GetGenericInputAttribute(Attribute::Index attribute) const {
2669 return AppendSuffix(GetGenericAttributeIndex(attribute), INPUT_ATTRIBUTE_NAME);
2670 }
2671
2672 std::unordered_map<u8, GenericVaryingDescription> varying_description;
2673
2674 std::string GetGenericOutputAttribute(Attribute::Index attribute, std::size_t element) const {
2675 const u8 offset = static_cast<u8>(GetGenericAttributeIndex(attribute) * 4 + element);
2676 const auto& description = varying_description.at(offset);
2677 if (description.is_scalar) {
2678 return description.name;
2679 }
2680 return fmt::format("{}[{}]", description.name, element - description.first_element);
2681 }
2682
2683 std::string GetConstBuffer(u32 index) const {
2684 return AppendSuffix(index, "cbuf");
2685 }
2686
2687 std::string GetGlobalMemory(const GlobalMemoryBase& descriptor) const {
2688 return fmt::format("gmem_{}_{}_{}", descriptor.cbuf_index, descriptor.cbuf_offset, suffix);
2689 }
2690
2691 std::string GetGlobalMemoryBlock(const GlobalMemoryBase& descriptor) const {
2692 return fmt::format("gmem_block_{}_{}_{}", descriptor.cbuf_index, descriptor.cbuf_offset,
2693 suffix);
2694 }
2695
2696 std::string GetConstBufferBlock(u32 index) const {
2697 return AppendSuffix(index, "cbuf_block");
2698 }
2699
2700 std::string GetLocalMemory() const {
2701 if (suffix.empty()) {
2702 return "lmem";
2703 } else {
2704 return "lmem_" + std::string{suffix};
2705 }
2706 }
2707
2708 std::string GetInternalFlag(InternalFlag flag) const {
2709 constexpr std::array InternalFlagNames = {"zero_flag", "sign_flag", "carry_flag",
2710 "overflow_flag"};
2711 const auto index = static_cast<u32>(flag);
2712 ASSERT(index < static_cast<u32>(InternalFlag::Amount));
2713
2714 if (suffix.empty()) {
2715 return InternalFlagNames[index];
2716 } else {
2717 return fmt::format("{}_{}", InternalFlagNames[index], suffix);
2718 }
2719 }
2720
2721 std::string GetSampler(const SamplerEntry& sampler) const {
2722 return AppendSuffix(sampler.index, "sampler");
2723 }
2724
2725 std::string GetImage(const ImageEntry& image) const {
2726 return AppendSuffix(image.index, "image");
2727 }
2728
2729 std::string AppendSuffix(u32 index, std::string_view name) const {
2730 if (suffix.empty()) {
2731 return fmt::format("{}{}", name, index);
2732 } else {
2733 return fmt::format("{}{}_{}", name, index, suffix);
2734 }
2735 }
2736
2737 u32 GetNumPhysicalInputAttributes() const {
2738 return stage == ShaderType::Vertex ? GetNumPhysicalAttributes() : GetNumPhysicalVaryings();
2739 }
2740
2741 u32 GetNumPhysicalAttributes() const {
2742 return std::min<u32>(device.GetMaxVertexAttributes(), Maxwell::NumVertexAttributes);
2743 }
2744
2745 u32 GetNumPhysicalVaryings() const {
2746 return std::min<u32>(device.GetMaxVaryings(), Maxwell::NumVaryings);
2747 }
2748
2749 const Device& device;
2750 const ShaderIR& ir;
2751 const Registry& registry;
2752 const ShaderType stage;
2753 const std::string_view identifier;
2754 const std::string_view suffix;
2755 const Header header;
2756 std::unordered_map<u8, VaryingTFB> transform_feedback;
2757
2758 ShaderWriter code;
2759
2760 std::optional<u32> max_input_vertices;
2761};
2762
2763std::string GetFlowVariable(u32 index) {
2764 return fmt::format("flow_var{}", index);
2765}
2766
2767class ExprDecompiler {
2768public:
2769 explicit ExprDecompiler(GLSLDecompiler& decomp_) : decomp{decomp_} {}
2770
2771 void operator()(const ExprAnd& expr) {
2772 inner += '(';
2773 std::visit(*this, *expr.operand1);
2774 inner += " && ";
2775 std::visit(*this, *expr.operand2);
2776 inner += ')';
2777 }
2778
2779 void operator()(const ExprOr& expr) {
2780 inner += '(';
2781 std::visit(*this, *expr.operand1);
2782 inner += " || ";
2783 std::visit(*this, *expr.operand2);
2784 inner += ')';
2785 }
2786
2787 void operator()(const ExprNot& expr) {
2788 inner += '!';
2789 std::visit(*this, *expr.operand1);
2790 }
2791
2792 void operator()(const ExprPredicate& expr) {
2793 const auto pred = static_cast<Tegra::Shader::Pred>(expr.predicate);
2794 inner += decomp.GetPredicate(pred);
2795 }
2796
2797 void operator()(const ExprCondCode& expr) {
2798 inner += decomp.Visit(decomp.ir.GetConditionCode(expr.cc)).AsBool();
2799 }
2800
2801 void operator()(const ExprVar& expr) {
2802 inner += GetFlowVariable(expr.var_index);
2803 }
2804
2805 void operator()(const ExprBoolean& expr) {
2806 inner += expr.value ? "true" : "false";
2807 }
2808
2809 void operator()(VideoCommon::Shader::ExprGprEqual& expr) {
2810 inner += fmt::format("(ftou({}) == {})", decomp.GetRegister(expr.gpr), expr.value);
2811 }
2812
2813 const std::string& GetResult() const {
2814 return inner;
2815 }
2816
2817private:
2818 GLSLDecompiler& decomp;
2819 std::string inner;
2820};
2821
2822class ASTDecompiler {
2823public:
2824 explicit ASTDecompiler(GLSLDecompiler& decomp_) : decomp{decomp_} {}
2825
2826 void operator()(const ASTProgram& ast) {
2827 ASTNode current = ast.nodes.GetFirst();
2828 while (current) {
2829 Visit(current);
2830 current = current->GetNext();
2831 }
2832 }
2833
2834 void operator()(const ASTIfThen& ast) {
2835 ExprDecompiler expr_parser{decomp};
2836 std::visit(expr_parser, *ast.condition);
2837 decomp.code.AddLine("if ({}) {{", expr_parser.GetResult());
2838 decomp.code.scope++;
2839 ASTNode current = ast.nodes.GetFirst();
2840 while (current) {
2841 Visit(current);
2842 current = current->GetNext();
2843 }
2844 decomp.code.scope--;
2845 decomp.code.AddLine("}}");
2846 }
2847
2848 void operator()(const ASTIfElse& ast) {
2849 decomp.code.AddLine("else {{");
2850 decomp.code.scope++;
2851 ASTNode current = ast.nodes.GetFirst();
2852 while (current) {
2853 Visit(current);
2854 current = current->GetNext();
2855 }
2856 decomp.code.scope--;
2857 decomp.code.AddLine("}}");
2858 }
2859
2860 void operator()([[maybe_unused]] const ASTBlockEncoded& ast) {
2861 UNREACHABLE();
2862 }
2863
2864 void operator()(const ASTBlockDecoded& ast) {
2865 decomp.VisitBlock(ast.nodes);
2866 }
2867
2868 void operator()(const ASTVarSet& ast) {
2869 ExprDecompiler expr_parser{decomp};
2870 std::visit(expr_parser, *ast.condition);
2871 decomp.code.AddLine("{} = {};", GetFlowVariable(ast.index), expr_parser.GetResult());
2872 }
2873
2874 void operator()(const ASTLabel& ast) {
2875 decomp.code.AddLine("// Label_{}:", ast.index);
2876 }
2877
2878 void operator()([[maybe_unused]] const ASTGoto& ast) {
2879 UNREACHABLE();
2880 }
2881
2882 void operator()(const ASTDoWhile& ast) {
2883 ExprDecompiler expr_parser{decomp};
2884 std::visit(expr_parser, *ast.condition);
2885 decomp.code.AddLine("do {{");
2886 decomp.code.scope++;
2887 ASTNode current = ast.nodes.GetFirst();
2888 while (current) {
2889 Visit(current);
2890 current = current->GetNext();
2891 }
2892 decomp.code.scope--;
2893 decomp.code.AddLine("}} while({});", expr_parser.GetResult());
2894 }
2895
2896 void operator()(const ASTReturn& ast) {
2897 const bool is_true = VideoCommon::Shader::ExprIsTrue(ast.condition);
2898 if (!is_true) {
2899 ExprDecompiler expr_parser{decomp};
2900 std::visit(expr_parser, *ast.condition);
2901 decomp.code.AddLine("if ({}) {{", expr_parser.GetResult());
2902 decomp.code.scope++;
2903 }
2904 if (ast.kills) {
2905 decomp.code.AddLine("discard;");
2906 } else {
2907 decomp.PreExit();
2908 decomp.code.AddLine("return;");
2909 }
2910 if (!is_true) {
2911 decomp.code.scope--;
2912 decomp.code.AddLine("}}");
2913 }
2914 }
2915
2916 void operator()(const ASTBreak& ast) {
2917 const bool is_true = VideoCommon::Shader::ExprIsTrue(ast.condition);
2918 if (!is_true) {
2919 ExprDecompiler expr_parser{decomp};
2920 std::visit(expr_parser, *ast.condition);
2921 decomp.code.AddLine("if ({}) {{", expr_parser.GetResult());
2922 decomp.code.scope++;
2923 }
2924 decomp.code.AddLine("break;");
2925 if (!is_true) {
2926 decomp.code.scope--;
2927 decomp.code.AddLine("}}");
2928 }
2929 }
2930
2931 void Visit(const ASTNode& node) {
2932 std::visit(*this, *node->GetInnerData());
2933 }
2934
2935private:
2936 GLSLDecompiler& decomp;
2937};
2938
2939void GLSLDecompiler::DecompileAST() {
2940 const u32 num_flow_variables = ir.GetASTNumVariables();
2941 for (u32 i = 0; i < num_flow_variables; i++) {
2942 code.AddLine("bool {} = false;", GetFlowVariable(i));
2943 }
2944
2945 ASTDecompiler decompiler{*this};
2946 decompiler.Visit(ir.GetASTProgram());
2947}
2948
2949} // Anonymous namespace
2950
2951ShaderEntries MakeEntries(const Device& device, const ShaderIR& ir, ShaderType stage) {
2952 ShaderEntries entries;
2953 for (const auto& cbuf : ir.GetConstantBuffers()) {
2954 entries.const_buffers.emplace_back(cbuf.second.GetMaxOffset(), cbuf.second.IsIndirect(),
2955 cbuf.first);
2956 }
2957 for (const auto& [base, usage] : ir.GetGlobalMemory()) {
2958 entries.global_memory_entries.emplace_back(base.cbuf_index, base.cbuf_offset, usage.is_read,
2959 usage.is_written);
2960 }
2961 for (const auto& sampler : ir.GetSamplers()) {
2962 entries.samplers.emplace_back(sampler);
2963 }
2964 for (const auto& image : ir.GetImages()) {
2965 entries.images.emplace_back(image);
2966 }
2967 const auto clip_distances = ir.GetClipDistances();
2968 for (std::size_t i = 0; i < std::size(clip_distances); ++i) {
2969 entries.clip_distances = (clip_distances[i] ? 1U : 0U) << i;
2970 }
2971 for (const auto& buffer : entries.const_buffers) {
2972 entries.enabled_uniform_buffers |= 1U << buffer.GetIndex();
2973 }
2974 entries.shader_length = ir.GetLength();
2975 return entries;
2976}
2977
2978std::string DecompileShader(const Device& device, const ShaderIR& ir, const Registry& registry,
2979 ShaderType stage, std::string_view identifier,
2980 std::string_view suffix) {
2981 GLSLDecompiler decompiler(device, ir, registry, stage, identifier, suffix);
2982 decompiler.Decompile();
2983 return decompiler.GetResult();
2984}
2985
2986} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.h b/src/video_core/renderer_opengl/gl_shader_decompiler.h
deleted file mode 100644
index 0397a000c..000000000
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.h
+++ /dev/null
@@ -1,69 +0,0 @@
1// Copyright 2018 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <array>
8#include <string>
9#include <string_view>
10#include <utility>
11#include <vector>
12#include "common/common_types.h"
13#include "video_core/engines/maxwell_3d.h"
14#include "video_core/engines/shader_type.h"
15#include "video_core/shader/registry.h"
16#include "video_core/shader/shader_ir.h"
17
18namespace OpenGL {
19
20class Device;
21
22using Maxwell = Tegra::Engines::Maxwell3D::Regs;
23using SamplerEntry = VideoCommon::Shader::SamplerEntry;
24using ImageEntry = VideoCommon::Shader::ImageEntry;
25
26class ConstBufferEntry : public VideoCommon::Shader::ConstBuffer {
27public:
28 explicit ConstBufferEntry(u32 max_offset_, bool is_indirect_, u32 index_)
29 : ConstBuffer{max_offset_, is_indirect_}, index{index_} {}
30
31 u32 GetIndex() const {
32 return index;
33 }
34
35private:
36 u32 index = 0;
37};
38
39struct GlobalMemoryEntry {
40 constexpr explicit GlobalMemoryEntry(u32 cbuf_index_, u32 cbuf_offset_, bool is_read_,
41 bool is_written_)
42 : cbuf_index{cbuf_index_}, cbuf_offset{cbuf_offset_}, is_read{is_read_}, is_written{
43 is_written_} {}
44
45 u32 cbuf_index = 0;
46 u32 cbuf_offset = 0;
47 bool is_read = false;
48 bool is_written = false;
49};
50
51struct ShaderEntries {
52 std::vector<ConstBufferEntry> const_buffers;
53 std::vector<GlobalMemoryEntry> global_memory_entries;
54 std::vector<SamplerEntry> samplers;
55 std::vector<ImageEntry> images;
56 std::size_t shader_length{};
57 u32 clip_distances{};
58 u32 enabled_uniform_buffers{};
59};
60
61ShaderEntries MakeEntries(const Device& device, const VideoCommon::Shader::ShaderIR& ir,
62 Tegra::Engines::ShaderType stage);
63
64std::string DecompileShader(const Device& device, const VideoCommon::Shader::ShaderIR& ir,
65 const VideoCommon::Shader::Registry& registry,
66 Tegra::Engines::ShaderType stage, std::string_view identifier,
67 std::string_view suffix = {});
68
69} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp
deleted file mode 100644
index 0deb86517..000000000
--- a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp
+++ /dev/null
@@ -1,482 +0,0 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <cstring>
6
7#include <fmt/format.h>
8
9#include "common/assert.h"
10#include "common/common_types.h"
11#include "common/fs/file.h"
12#include "common/fs/fs.h"
13#include "common/fs/path_util.h"
14#include "common/logging/log.h"
15#include "common/scm_rev.h"
16#include "common/settings.h"
17#include "common/zstd_compression.h"
18#include "core/core.h"
19#include "core/hle/kernel/k_process.h"
20#include "video_core/engines/shader_type.h"
21#include "video_core/renderer_opengl/gl_shader_cache.h"
22#include "video_core/renderer_opengl/gl_shader_disk_cache.h"
23
24namespace OpenGL {
25
26using Tegra::Engines::ShaderType;
27using VideoCommon::Shader::BindlessSamplerMap;
28using VideoCommon::Shader::BoundSamplerMap;
29using VideoCommon::Shader::KeyMap;
30using VideoCommon::Shader::SeparateSamplerKey;
31using ShaderCacheVersionHash = std::array<u8, 64>;
32
33struct ConstBufferKey {
34 u32 cbuf = 0;
35 u32 offset = 0;
36 u32 value = 0;
37};
38
39struct BoundSamplerEntry {
40 u32 offset = 0;
41 Tegra::Engines::SamplerDescriptor sampler;
42};
43
44struct SeparateSamplerEntry {
45 u32 cbuf1 = 0;
46 u32 cbuf2 = 0;
47 u32 offset1 = 0;
48 u32 offset2 = 0;
49 Tegra::Engines::SamplerDescriptor sampler;
50};
51
52struct BindlessSamplerEntry {
53 u32 cbuf = 0;
54 u32 offset = 0;
55 Tegra::Engines::SamplerDescriptor sampler;
56};
57
58namespace {
59
60constexpr u32 NativeVersion = 21;
61
62ShaderCacheVersionHash GetShaderCacheVersionHash() {
63 ShaderCacheVersionHash hash{};
64 const std::size_t length = std::min(std::strlen(Common::g_shader_cache_version), hash.size());
65 std::memcpy(hash.data(), Common::g_shader_cache_version, length);
66 return hash;
67}
68
69} // Anonymous namespace
70
71ShaderDiskCacheEntry::ShaderDiskCacheEntry() = default;
72
73ShaderDiskCacheEntry::~ShaderDiskCacheEntry() = default;
74
75bool ShaderDiskCacheEntry::Load(Common::FS::IOFile& file) {
76 if (!file.ReadObject(type)) {
77 return false;
78 }
79 u32 code_size;
80 u32 code_size_b;
81 if (!file.ReadObject(code_size) || !file.ReadObject(code_size_b)) {
82 return false;
83 }
84 code.resize(code_size);
85 code_b.resize(code_size_b);
86 if (file.Read(code) != code_size) {
87 return false;
88 }
89 if (HasProgramA() && file.Read(code_b) != code_size_b) {
90 return false;
91 }
92
93 u8 is_texture_handler_size_known;
94 u32 texture_handler_size_value;
95 u32 num_keys;
96 u32 num_bound_samplers;
97 u32 num_separate_samplers;
98 u32 num_bindless_samplers;
99 if (!file.ReadObject(unique_identifier) || !file.ReadObject(bound_buffer) ||
100 !file.ReadObject(is_texture_handler_size_known) ||
101 !file.ReadObject(texture_handler_size_value) || !file.ReadObject(graphics_info) ||
102 !file.ReadObject(compute_info) || !file.ReadObject(num_keys) ||
103 !file.ReadObject(num_bound_samplers) || !file.ReadObject(num_separate_samplers) ||
104 !file.ReadObject(num_bindless_samplers)) {
105 return false;
106 }
107 if (is_texture_handler_size_known) {
108 texture_handler_size = texture_handler_size_value;
109 }
110
111 std::vector<ConstBufferKey> flat_keys(num_keys);
112 std::vector<BoundSamplerEntry> flat_bound_samplers(num_bound_samplers);
113 std::vector<SeparateSamplerEntry> flat_separate_samplers(num_separate_samplers);
114 std::vector<BindlessSamplerEntry> flat_bindless_samplers(num_bindless_samplers);
115 if (file.Read(flat_keys) != flat_keys.size() ||
116 file.Read(flat_bound_samplers) != flat_bound_samplers.size() ||
117 file.Read(flat_separate_samplers) != flat_separate_samplers.size() ||
118 file.Read(flat_bindless_samplers) != flat_bindless_samplers.size()) {
119 return false;
120 }
121 for (const auto& entry : flat_keys) {
122 keys.insert({{entry.cbuf, entry.offset}, entry.value});
123 }
124 for (const auto& entry : flat_bound_samplers) {
125 bound_samplers.emplace(entry.offset, entry.sampler);
126 }
127 for (const auto& entry : flat_separate_samplers) {
128 SeparateSamplerKey key;
129 key.buffers = {entry.cbuf1, entry.cbuf2};
130 key.offsets = {entry.offset1, entry.offset2};
131 separate_samplers.emplace(key, entry.sampler);
132 }
133 for (const auto& entry : flat_bindless_samplers) {
134 bindless_samplers.insert({{entry.cbuf, entry.offset}, entry.sampler});
135 }
136
137 return true;
138}
139
140bool ShaderDiskCacheEntry::Save(Common::FS::IOFile& file) const {
141 if (!file.WriteObject(static_cast<u32>(type)) ||
142 !file.WriteObject(static_cast<u32>(code.size())) ||
143 !file.WriteObject(static_cast<u32>(code_b.size()))) {
144 return false;
145 }
146 if (file.Write(code) != code.size()) {
147 return false;
148 }
149 if (HasProgramA() && file.Write(code_b) != code_b.size()) {
150 return false;
151 }
152
153 if (!file.WriteObject(unique_identifier) || !file.WriteObject(bound_buffer) ||
154 !file.WriteObject(static_cast<u8>(texture_handler_size.has_value())) ||
155 !file.WriteObject(texture_handler_size.value_or(0)) || !file.WriteObject(graphics_info) ||
156 !file.WriteObject(compute_info) || !file.WriteObject(static_cast<u32>(keys.size())) ||
157 !file.WriteObject(static_cast<u32>(bound_samplers.size())) ||
158 !file.WriteObject(static_cast<u32>(separate_samplers.size())) ||
159 !file.WriteObject(static_cast<u32>(bindless_samplers.size()))) {
160 return false;
161 }
162
163 std::vector<ConstBufferKey> flat_keys;
164 flat_keys.reserve(keys.size());
165 for (const auto& [address, value] : keys) {
166 flat_keys.push_back(ConstBufferKey{address.first, address.second, value});
167 }
168
169 std::vector<BoundSamplerEntry> flat_bound_samplers;
170 flat_bound_samplers.reserve(bound_samplers.size());
171 for (const auto& [address, sampler] : bound_samplers) {
172 flat_bound_samplers.push_back(BoundSamplerEntry{address, sampler});
173 }
174
175 std::vector<SeparateSamplerEntry> flat_separate_samplers;
176 flat_separate_samplers.reserve(separate_samplers.size());
177 for (const auto& [key, sampler] : separate_samplers) {
178 SeparateSamplerEntry entry;
179 std::tie(entry.cbuf1, entry.cbuf2) = key.buffers;
180 std::tie(entry.offset1, entry.offset2) = key.offsets;
181 entry.sampler = sampler;
182 flat_separate_samplers.push_back(entry);
183 }
184
185 std::vector<BindlessSamplerEntry> flat_bindless_samplers;
186 flat_bindless_samplers.reserve(bindless_samplers.size());
187 for (const auto& [address, sampler] : bindless_samplers) {
188 flat_bindless_samplers.push_back(
189 BindlessSamplerEntry{address.first, address.second, sampler});
190 }
191
192 return file.Write(flat_keys) == flat_keys.size() &&
193 file.Write(flat_bound_samplers) == flat_bound_samplers.size() &&
194 file.Write(flat_separate_samplers) == flat_separate_samplers.size() &&
195 file.Write(flat_bindless_samplers) == flat_bindless_samplers.size();
196}
197
198ShaderDiskCacheOpenGL::ShaderDiskCacheOpenGL() = default;
199
200ShaderDiskCacheOpenGL::~ShaderDiskCacheOpenGL() = default;
201
202void ShaderDiskCacheOpenGL::BindTitleID(u64 title_id_) {
203 title_id = title_id_;
204}
205
206std::optional<std::vector<ShaderDiskCacheEntry>> ShaderDiskCacheOpenGL::LoadTransferable() {
207 // Skip games without title id
208 const bool has_title_id = title_id != 0;
209 if (!Settings::values.use_disk_shader_cache.GetValue() || !has_title_id) {
210 return std::nullopt;
211 }
212
213 Common::FS::IOFile file{GetTransferablePath(), Common::FS::FileAccessMode::Read,
214 Common::FS::FileType::BinaryFile};
215 if (!file.IsOpen()) {
216 LOG_INFO(Render_OpenGL, "No transferable shader cache found");
217 is_usable = true;
218 return std::nullopt;
219 }
220
221 u32 version{};
222 if (!file.ReadObject(version)) {
223 LOG_ERROR(Render_OpenGL, "Failed to get transferable cache version, skipping it");
224 return std::nullopt;
225 }
226
227 if (version < NativeVersion) {
228 LOG_INFO(Render_OpenGL, "Transferable shader cache is old, removing");
229 file.Close();
230 InvalidateTransferable();
231 is_usable = true;
232 return std::nullopt;
233 }
234 if (version > NativeVersion) {
235 LOG_WARNING(Render_OpenGL, "Transferable shader cache was generated with a newer version "
236 "of the emulator, skipping");
237 return std::nullopt;
238 }
239
240 // Version is valid, load the shaders
241 std::vector<ShaderDiskCacheEntry> entries;
242 while (static_cast<u64>(file.Tell()) < file.GetSize()) {
243 ShaderDiskCacheEntry& entry = entries.emplace_back();
244 if (!entry.Load(file)) {
245 LOG_ERROR(Render_OpenGL, "Failed to load transferable raw entry, skipping");
246 return std::nullopt;
247 }
248 }
249
250 is_usable = true;
251 return {std::move(entries)};
252}
253
254std::vector<ShaderDiskCachePrecompiled> ShaderDiskCacheOpenGL::LoadPrecompiled() {
255 if (!is_usable) {
256 return {};
257 }
258
259 Common::FS::IOFile file{GetPrecompiledPath(), Common::FS::FileAccessMode::Read,
260 Common::FS::FileType::BinaryFile};
261 if (!file.IsOpen()) {
262 LOG_INFO(Render_OpenGL, "No precompiled shader cache found");
263 return {};
264 }
265
266 if (const auto result = LoadPrecompiledFile(file)) {
267 return *result;
268 }
269
270 LOG_INFO(Render_OpenGL, "Failed to load precompiled cache");
271 file.Close();
272 InvalidatePrecompiled();
273 return {};
274}
275
276std::optional<std::vector<ShaderDiskCachePrecompiled>> ShaderDiskCacheOpenGL::LoadPrecompiledFile(
277 Common::FS::IOFile& file) {
278 // Read compressed file from disk and decompress to virtual precompiled cache file
279 std::vector<u8> compressed(file.GetSize());
280 if (file.Read(compressed) != file.GetSize()) {
281 return std::nullopt;
282 }
283 const std::vector<u8> decompressed = Common::Compression::DecompressDataZSTD(compressed);
284 SaveArrayToPrecompiled(decompressed.data(), decompressed.size());
285 precompiled_cache_virtual_file_offset = 0;
286
287 ShaderCacheVersionHash file_hash{};
288 if (!LoadArrayFromPrecompiled(file_hash.data(), file_hash.size())) {
289 precompiled_cache_virtual_file_offset = 0;
290 return std::nullopt;
291 }
292 if (GetShaderCacheVersionHash() != file_hash) {
293 LOG_INFO(Render_OpenGL, "Precompiled cache is from another version of the emulator");
294 precompiled_cache_virtual_file_offset = 0;
295 return std::nullopt;
296 }
297
298 std::vector<ShaderDiskCachePrecompiled> entries;
299 while (precompiled_cache_virtual_file_offset < precompiled_cache_virtual_file.GetSize()) {
300 u32 binary_size;
301 auto& entry = entries.emplace_back();
302 if (!LoadObjectFromPrecompiled(entry.unique_identifier) ||
303 !LoadObjectFromPrecompiled(entry.binary_format) ||
304 !LoadObjectFromPrecompiled(binary_size)) {
305 return std::nullopt;
306 }
307
308 entry.binary.resize(binary_size);
309 if (!LoadArrayFromPrecompiled(entry.binary.data(), entry.binary.size())) {
310 return std::nullopt;
311 }
312 }
313 return entries;
314}
315
316void ShaderDiskCacheOpenGL::InvalidateTransferable() {
317 if (!Common::FS::RemoveFile(GetTransferablePath())) {
318 LOG_ERROR(Render_OpenGL, "Failed to invalidate transferable file={}",
319 Common::FS::PathToUTF8String(GetTransferablePath()));
320 }
321 InvalidatePrecompiled();
322}
323
324void ShaderDiskCacheOpenGL::InvalidatePrecompiled() {
325 // Clear virtaul precompiled cache file
326 precompiled_cache_virtual_file.Resize(0);
327
328 if (!Common::FS::RemoveFile(GetPrecompiledPath())) {
329 LOG_ERROR(Render_OpenGL, "Failed to invalidate precompiled file={}",
330 Common::FS::PathToUTF8String(GetPrecompiledPath()));
331 }
332}
333
334void ShaderDiskCacheOpenGL::SaveEntry(const ShaderDiskCacheEntry& entry) {
335 if (!is_usable) {
336 return;
337 }
338
339 const u64 id = entry.unique_identifier;
340 if (stored_transferable.contains(id)) {
341 // The shader already exists
342 return;
343 }
344
345 Common::FS::IOFile file = AppendTransferableFile();
346 if (!file.IsOpen()) {
347 return;
348 }
349 if (!entry.Save(file)) {
350 LOG_ERROR(Render_OpenGL, "Failed to save raw transferable cache entry, removing");
351 file.Close();
352 InvalidateTransferable();
353 return;
354 }
355
356 stored_transferable.insert(id);
357}
358
359void ShaderDiskCacheOpenGL::SavePrecompiled(u64 unique_identifier, GLuint program) {
360 if (!is_usable) {
361 return;
362 }
363
364 // TODO(Rodrigo): This is a design smell. I shouldn't be having to manually write the header
365 // when writing the dump. This should be done the moment I get access to write to the virtual
366 // file.
367 if (precompiled_cache_virtual_file.GetSize() == 0) {
368 SavePrecompiledHeaderToVirtualPrecompiledCache();
369 }
370
371 GLint binary_length;
372 glGetProgramiv(program, GL_PROGRAM_BINARY_LENGTH, &binary_length);
373
374 GLenum binary_format;
375 std::vector<u8> binary(binary_length);
376 glGetProgramBinary(program, binary_length, nullptr, &binary_format, binary.data());
377
378 if (!SaveObjectToPrecompiled(unique_identifier) || !SaveObjectToPrecompiled(binary_format) ||
379 !SaveObjectToPrecompiled(static_cast<u32>(binary.size())) ||
380 !SaveArrayToPrecompiled(binary.data(), binary.size())) {
381 LOG_ERROR(Render_OpenGL, "Failed to save binary program file in shader={:016X}, removing",
382 unique_identifier);
383 InvalidatePrecompiled();
384 }
385}
386
387Common::FS::IOFile ShaderDiskCacheOpenGL::AppendTransferableFile() const {
388 if (!EnsureDirectories()) {
389 return {};
390 }
391
392 const auto transferable_path{GetTransferablePath()};
393 const bool existed = Common::FS::Exists(transferable_path);
394
395 Common::FS::IOFile file{transferable_path, Common::FS::FileAccessMode::Append,
396 Common::FS::FileType::BinaryFile};
397 if (!file.IsOpen()) {
398 LOG_ERROR(Render_OpenGL, "Failed to open transferable cache in path={}",
399 Common::FS::PathToUTF8String(transferable_path));
400 return {};
401 }
402 if (!existed || file.GetSize() == 0) {
403 // If the file didn't exist, write its version
404 if (!file.WriteObject(NativeVersion)) {
405 LOG_ERROR(Render_OpenGL, "Failed to write transferable cache version in path={}",
406 Common::FS::PathToUTF8String(transferable_path));
407 return {};
408 }
409 }
410 return file;
411}
412
413void ShaderDiskCacheOpenGL::SavePrecompiledHeaderToVirtualPrecompiledCache() {
414 const auto hash{GetShaderCacheVersionHash()};
415 if (!SaveArrayToPrecompiled(hash.data(), hash.size())) {
416 LOG_ERROR(
417 Render_OpenGL,
418 "Failed to write precompiled cache version hash to virtual precompiled cache file");
419 }
420}
421
422void ShaderDiskCacheOpenGL::SaveVirtualPrecompiledFile() {
423 precompiled_cache_virtual_file_offset = 0;
424 const std::vector<u8> uncompressed = precompiled_cache_virtual_file.ReadAllBytes();
425 const std::vector<u8> compressed =
426 Common::Compression::CompressDataZSTDDefault(uncompressed.data(), uncompressed.size());
427
428 const auto precompiled_path = GetPrecompiledPath();
429 Common::FS::IOFile file{precompiled_path, Common::FS::FileAccessMode::Write,
430 Common::FS::FileType::BinaryFile};
431
432 if (!file.IsOpen()) {
433 LOG_ERROR(Render_OpenGL, "Failed to open precompiled cache in path={}",
434 Common::FS::PathToUTF8String(precompiled_path));
435 return;
436 }
437 if (file.Write(compressed) != compressed.size()) {
438 LOG_ERROR(Render_OpenGL, "Failed to write precompiled cache version in path={}",
439 Common::FS::PathToUTF8String(precompiled_path));
440 }
441}
442
443bool ShaderDiskCacheOpenGL::EnsureDirectories() const {
444 const auto CreateDir = [](const std::filesystem::path& dir) {
445 if (!Common::FS::CreateDir(dir)) {
446 LOG_ERROR(Render_OpenGL, "Failed to create directory={}",
447 Common::FS::PathToUTF8String(dir));
448 return false;
449 }
450 return true;
451 };
452
453 return CreateDir(Common::FS::GetYuzuPath(Common::FS::YuzuPath::ShaderDir)) &&
454 CreateDir(GetBaseDir()) && CreateDir(GetTransferableDir()) &&
455 CreateDir(GetPrecompiledDir());
456}
457
458std::filesystem::path ShaderDiskCacheOpenGL::GetTransferablePath() const {
459 return GetTransferableDir() / fmt::format("{}.bin", GetTitleID());
460}
461
462std::filesystem::path ShaderDiskCacheOpenGL::GetPrecompiledPath() const {
463 return GetPrecompiledDir() / fmt::format("{}.bin", GetTitleID());
464}
465
466std::filesystem::path ShaderDiskCacheOpenGL::GetTransferableDir() const {
467 return GetBaseDir() / "transferable";
468}
469
470std::filesystem::path ShaderDiskCacheOpenGL::GetPrecompiledDir() const {
471 return GetBaseDir() / "precompiled";
472}
473
474std::filesystem::path ShaderDiskCacheOpenGL::GetBaseDir() const {
475 return Common::FS::GetYuzuPath(Common::FS::YuzuPath::ShaderDir) / "opengl";
476}
477
478std::string ShaderDiskCacheOpenGL::GetTitleID() const {
479 return fmt::format("{:016X}", title_id);
480}
481
482} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.h b/src/video_core/renderer_opengl/gl_shader_disk_cache.h
deleted file mode 100644
index f8bc23868..000000000
--- a/src/video_core/renderer_opengl/gl_shader_disk_cache.h
+++ /dev/null
@@ -1,176 +0,0 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <filesystem>
8#include <optional>
9#include <string>
10#include <tuple>
11#include <type_traits>
12#include <unordered_map>
13#include <unordered_set>
14#include <utility>
15#include <vector>
16
17#include <glad/glad.h>
18
19#include "common/assert.h"
20#include "common/common_types.h"
21#include "core/file_sys/vfs_vector.h"
22#include "video_core/engines/shader_type.h"
23#include "video_core/shader/registry.h"
24
25namespace Common::FS {
26class IOFile;
27}
28
29namespace OpenGL {
30
31using ProgramCode = std::vector<u64>;
32
33/// Describes a shader and how it's used by the guest GPU
34struct ShaderDiskCacheEntry {
35 ShaderDiskCacheEntry();
36 ~ShaderDiskCacheEntry();
37
38 bool Load(Common::FS::IOFile& file);
39
40 bool Save(Common::FS::IOFile& file) const;
41
42 bool HasProgramA() const {
43 return !code.empty() && !code_b.empty();
44 }
45
46 Tegra::Engines::ShaderType type{};
47 ProgramCode code;
48 ProgramCode code_b;
49
50 u64 unique_identifier = 0;
51 std::optional<u32> texture_handler_size;
52 u32 bound_buffer = 0;
53 VideoCommon::Shader::GraphicsInfo graphics_info;
54 VideoCommon::Shader::ComputeInfo compute_info;
55 VideoCommon::Shader::KeyMap keys;
56 VideoCommon::Shader::BoundSamplerMap bound_samplers;
57 VideoCommon::Shader::SeparateSamplerMap separate_samplers;
58 VideoCommon::Shader::BindlessSamplerMap bindless_samplers;
59};
60
61/// Contains an OpenGL dumped binary program
62struct ShaderDiskCachePrecompiled {
63 u64 unique_identifier = 0;
64 GLenum binary_format = 0;
65 std::vector<u8> binary;
66};
67
68class ShaderDiskCacheOpenGL {
69public:
70 explicit ShaderDiskCacheOpenGL();
71 ~ShaderDiskCacheOpenGL();
72
73 /// Binds a title ID for all future operations.
74 void BindTitleID(u64 title_id);
75
76 /// Loads transferable cache. If file has a old version or on failure, it deletes the file.
77 std::optional<std::vector<ShaderDiskCacheEntry>> LoadTransferable();
78
79 /// Loads current game's precompiled cache. Invalidates on failure.
80 std::vector<ShaderDiskCachePrecompiled> LoadPrecompiled();
81
82 /// Removes the transferable (and precompiled) cache file.
83 void InvalidateTransferable();
84
85 /// Removes the precompiled cache file and clears virtual precompiled cache file.
86 void InvalidatePrecompiled();
87
88 /// Saves a raw dump to the transferable file. Checks for collisions.
89 void SaveEntry(const ShaderDiskCacheEntry& entry);
90
91 /// Saves a dump entry to the precompiled file. Does not check for collisions.
92 void SavePrecompiled(u64 unique_identifier, GLuint program);
93
94 /// Serializes virtual precompiled shader cache file to real file
95 void SaveVirtualPrecompiledFile();
96
97private:
98 /// Loads the transferable cache. Returns empty on failure.
99 std::optional<std::vector<ShaderDiskCachePrecompiled>> LoadPrecompiledFile(
100 Common::FS::IOFile& file);
101
102 /// Opens current game's transferable file and write it's header if it doesn't exist
103 Common::FS::IOFile AppendTransferableFile() const;
104
105 /// Save precompiled header to precompiled_cache_in_memory
106 void SavePrecompiledHeaderToVirtualPrecompiledCache();
107
108 /// Create shader disk cache directories. Returns true on success.
109 bool EnsureDirectories() const;
110
111 /// Gets current game's transferable file path
112 std::filesystem::path GetTransferablePath() const;
113
114 /// Gets current game's precompiled file path
115 std::filesystem::path GetPrecompiledPath() const;
116
117 /// Get user's transferable directory path
118 std::filesystem::path GetTransferableDir() const;
119
120 /// Get user's precompiled directory path
121 std::filesystem::path GetPrecompiledDir() const;
122
123 /// Get user's shader directory path
124 std::filesystem::path GetBaseDir() const;
125
126 /// Get current game's title id
127 std::string GetTitleID() const;
128
129 template <typename T>
130 bool SaveArrayToPrecompiled(const T* data, std::size_t length) {
131 const std::size_t write_length = precompiled_cache_virtual_file.WriteArray(
132 data, length, precompiled_cache_virtual_file_offset);
133 precompiled_cache_virtual_file_offset += write_length;
134 return write_length == sizeof(T) * length;
135 }
136
137 template <typename T>
138 bool LoadArrayFromPrecompiled(T* data, std::size_t length) {
139 const std::size_t read_length = precompiled_cache_virtual_file.ReadArray(
140 data, length, precompiled_cache_virtual_file_offset);
141 precompiled_cache_virtual_file_offset += read_length;
142 return read_length == sizeof(T) * length;
143 }
144
145 template <typename T>
146 bool SaveObjectToPrecompiled(const T& object) {
147 return SaveArrayToPrecompiled(&object, 1);
148 }
149
150 bool SaveObjectToPrecompiled(bool object) {
151 const auto value = static_cast<u8>(object);
152 return SaveArrayToPrecompiled(&value, 1);
153 }
154
155 template <typename T>
156 bool LoadObjectFromPrecompiled(T& object) {
157 return LoadArrayFromPrecompiled(&object, 1);
158 }
159
160 // Stores whole precompiled cache which will be read from or saved to the precompiled chache
161 // file
162 FileSys::VectorVfsFile precompiled_cache_virtual_file;
163 // Stores the current offset of the precompiled cache file for IO purposes
164 std::size_t precompiled_cache_virtual_file_offset = 0;
165
166 // Stored transferable shaders
167 std::unordered_set<u64> stored_transferable;
168
169 /// Title ID to operate on
170 u64 title_id = 0;
171
172 // The cache has been loaded at boot
173 bool is_usable = false;
174};
175
176} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_shader_manager.cpp b/src/video_core/renderer_opengl/gl_shader_manager.cpp
index 553e6e8d6..399959afb 100644
--- a/src/video_core/renderer_opengl/gl_shader_manager.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_manager.cpp
@@ -1,149 +1,3 @@
1// Copyright 2018 yuzu Emulator Project 1// Copyright 2018 yuzu Emulator Project
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4
5#include <glad/glad.h>
6
7#include "common/common_types.h"
8#include "video_core/engines/maxwell_3d.h"
9#include "video_core/renderer_opengl/gl_device.h"
10#include "video_core/renderer_opengl/gl_shader_manager.h"
11
12namespace OpenGL {
13
14namespace {
15
16void BindProgram(GLenum stage, GLuint current, GLuint old, bool& enabled) {
17 if (current == old) {
18 return;
19 }
20 if (current == 0) {
21 if (enabled) {
22 enabled = false;
23 glDisable(stage);
24 }
25 return;
26 }
27 if (!enabled) {
28 enabled = true;
29 glEnable(stage);
30 }
31 glBindProgramARB(stage, current);
32}
33
34} // Anonymous namespace
35
36ProgramManager::ProgramManager(const Device& device)
37 : use_assembly_programs{device.UseAssemblyShaders()} {
38 if (use_assembly_programs) {
39 glEnable(GL_COMPUTE_PROGRAM_NV);
40 } else {
41 graphics_pipeline.Create();
42 glBindProgramPipeline(graphics_pipeline.handle);
43 }
44}
45
46ProgramManager::~ProgramManager() = default;
47
48void ProgramManager::BindCompute(GLuint program) {
49 if (use_assembly_programs) {
50 glBindProgramARB(GL_COMPUTE_PROGRAM_NV, program);
51 } else {
52 is_graphics_bound = false;
53 glUseProgram(program);
54 }
55}
56
57void ProgramManager::BindGraphicsPipeline() {
58 if (!use_assembly_programs) {
59 UpdateSourcePrograms();
60 }
61}
62
63void ProgramManager::BindHostPipeline(GLuint pipeline) {
64 if (use_assembly_programs) {
65 if (geometry_enabled) {
66 geometry_enabled = false;
67 old_state.geometry = 0;
68 glDisable(GL_GEOMETRY_PROGRAM_NV);
69 }
70 } else {
71 if (!is_graphics_bound) {
72 glUseProgram(0);
73 }
74 }
75 glBindProgramPipeline(pipeline);
76}
77
78void ProgramManager::RestoreGuestPipeline() {
79 if (use_assembly_programs) {
80 glBindProgramPipeline(0);
81 } else {
82 glBindProgramPipeline(graphics_pipeline.handle);
83 }
84}
85
86void ProgramManager::BindHostCompute(GLuint program) {
87 if (use_assembly_programs) {
88 glDisable(GL_COMPUTE_PROGRAM_NV);
89 }
90 glUseProgram(program);
91 is_graphics_bound = false;
92}
93
94void ProgramManager::RestoreGuestCompute() {
95 if (use_assembly_programs) {
96 glEnable(GL_COMPUTE_PROGRAM_NV);
97 glUseProgram(0);
98 }
99}
100
101void ProgramManager::UseVertexShader(GLuint program) {
102 if (use_assembly_programs) {
103 BindProgram(GL_VERTEX_PROGRAM_NV, program, current_state.vertex, vertex_enabled);
104 }
105 current_state.vertex = program;
106}
107
108void ProgramManager::UseGeometryShader(GLuint program) {
109 if (use_assembly_programs) {
110 BindProgram(GL_GEOMETRY_PROGRAM_NV, program, current_state.vertex, geometry_enabled);
111 }
112 current_state.geometry = program;
113}
114
115void ProgramManager::UseFragmentShader(GLuint program) {
116 if (use_assembly_programs) {
117 BindProgram(GL_FRAGMENT_PROGRAM_NV, program, current_state.vertex, fragment_enabled);
118 }
119 current_state.fragment = program;
120}
121
122void ProgramManager::UpdateSourcePrograms() {
123 if (!is_graphics_bound) {
124 is_graphics_bound = true;
125 glUseProgram(0);
126 }
127
128 const GLuint handle = graphics_pipeline.handle;
129 const auto update_state = [handle](GLenum stage, GLuint current, GLuint old) {
130 if (current == old) {
131 return;
132 }
133 glUseProgramStages(handle, stage, current);
134 };
135 update_state(GL_VERTEX_SHADER_BIT, current_state.vertex, old_state.vertex);
136 update_state(GL_GEOMETRY_SHADER_BIT, current_state.geometry, old_state.geometry);
137 update_state(GL_FRAGMENT_SHADER_BIT, current_state.fragment, old_state.fragment);
138
139 old_state = current_state;
140}
141
142void MaxwellUniformData::SetFromRegs(const Tegra::Engines::Maxwell3D& maxwell) {
143 const auto& regs = maxwell.regs;
144
145 // Y_NEGATE controls what value S2R returns for the Y_DIRECTION system value.
146 y_direction = regs.screen_y_control.y_negate == 0 ? 1.0f : -1.0f;
147}
148
149} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_shader_manager.h b/src/video_core/renderer_opengl/gl_shader_manager.h
index ad42cce74..d7ef0775d 100644
--- a/src/video_core/renderer_opengl/gl_shader_manager.h
+++ b/src/video_core/renderer_opengl/gl_shader_manager.h
@@ -4,79 +4,142 @@
4 4
5#pragma once 5#pragma once
6 6
7#include <cstddef> 7#include <array>
8#include <span>
8 9
9#include <glad/glad.h> 10#include <glad/glad.h>
10 11
12#include "video_core/renderer_opengl/gl_device.h"
11#include "video_core/renderer_opengl/gl_resource_manager.h" 13#include "video_core/renderer_opengl/gl_resource_manager.h"
12#include "video_core/renderer_opengl/maxwell_to_gl.h"
13 14
14namespace OpenGL { 15namespace OpenGL {
15 16
16class Device;
17
18/// Uniform structure for the Uniform Buffer Object, all vectors must be 16-byte aligned
19/// @note Always keep a vec4 at the end. The GL spec is not clear whether the alignment at
20/// the end of a uniform block is included in UNIFORM_BLOCK_DATA_SIZE or not.
21/// Not following that rule will cause problems on some AMD drivers.
22struct alignas(16) MaxwellUniformData {
23 void SetFromRegs(const Tegra::Engines::Maxwell3D& maxwell);
24
25 GLfloat y_direction;
26};
27static_assert(sizeof(MaxwellUniformData) == 16, "MaxwellUniformData structure size is incorrect");
28static_assert(sizeof(MaxwellUniformData) < 16384,
29 "MaxwellUniformData structure must be less than 16kb as per the OpenGL spec");
30
31class ProgramManager { 17class ProgramManager {
32public: 18 static constexpr size_t NUM_STAGES = 5;
33 explicit ProgramManager(const Device& device);
34 ~ProgramManager();
35
36 /// Binds a compute program
37 void BindCompute(GLuint program);
38
39 /// Updates bound programs.
40 void BindGraphicsPipeline();
41
42 /// Binds an OpenGL pipeline object unsynchronized with the guest state.
43 void BindHostPipeline(GLuint pipeline);
44
45 /// Rewinds BindHostPipeline state changes.
46 void RestoreGuestPipeline();
47
48 /// Binds an OpenGL GLSL program object unsynchronized with the guest state.
49 void BindHostCompute(GLuint program);
50 19
51 /// Rewinds BindHostCompute state changes. 20 static constexpr std::array ASSEMBLY_PROGRAM_ENUMS{
52 void RestoreGuestCompute(); 21 GL_VERTEX_PROGRAM_NV, GL_TESS_CONTROL_PROGRAM_NV, GL_TESS_EVALUATION_PROGRAM_NV,
53 22 GL_GEOMETRY_PROGRAM_NV, GL_FRAGMENT_PROGRAM_NV,
54 void UseVertexShader(GLuint program);
55 void UseGeometryShader(GLuint program);
56 void UseFragmentShader(GLuint program);
57
58private:
59 struct PipelineState {
60 GLuint vertex = 0;
61 GLuint geometry = 0;
62 GLuint fragment = 0;
63 }; 23 };
64 24
65 /// Update GLSL programs. 25public:
66 void UpdateSourcePrograms(); 26 explicit ProgramManager(const Device& device) {
67 27 glCreateProgramPipelines(1, &pipeline.handle);
68 OGLPipeline graphics_pipeline; 28 if (device.UseAssemblyShaders()) {
69 29 glEnable(GL_COMPUTE_PROGRAM_NV);
70 PipelineState current_state; 30 }
71 PipelineState old_state; 31 }
72 32
73 bool use_assembly_programs = false; 33 void BindComputeProgram(GLuint program) {
74 34 glUseProgram(program);
75 bool is_graphics_bound = true; 35 is_compute_bound = true;
36 }
37
38 void BindComputeAssemblyProgram(GLuint program) {
39 if (current_assembly_compute_program != program) {
40 current_assembly_compute_program = program;
41 glBindProgramARB(GL_COMPUTE_PROGRAM_NV, program);
42 }
43 UnbindPipeline();
44 }
45
46 void BindSourcePrograms(std::span<const OGLProgram, NUM_STAGES> programs) {
47 static constexpr std::array<GLenum, 5> stage_enums{
48 GL_VERTEX_SHADER_BIT, GL_TESS_CONTROL_SHADER_BIT, GL_TESS_EVALUATION_SHADER_BIT,
49 GL_GEOMETRY_SHADER_BIT, GL_FRAGMENT_SHADER_BIT,
50 };
51 for (size_t stage = 0; stage < NUM_STAGES; ++stage) {
52 if (current_programs[stage] != programs[stage].handle) {
53 current_programs[stage] = programs[stage].handle;
54 glUseProgramStages(pipeline.handle, stage_enums[stage], programs[stage].handle);
55 }
56 }
57 BindPipeline();
58 }
59
60 void BindPresentPrograms(GLuint vertex, GLuint fragment) {
61 if (current_programs[0] != vertex) {
62 current_programs[0] = vertex;
63 glUseProgramStages(pipeline.handle, GL_VERTEX_SHADER_BIT, vertex);
64 }
65 if (current_programs[4] != fragment) {
66 current_programs[4] = fragment;
67 glUseProgramStages(pipeline.handle, GL_FRAGMENT_SHADER_BIT, fragment);
68 }
69 glUseProgramStages(
70 pipeline.handle,
71 GL_TESS_CONTROL_SHADER_BIT | GL_TESS_EVALUATION_SHADER_BIT | GL_GEOMETRY_SHADER_BIT, 0);
72 current_programs[1] = 0;
73 current_programs[2] = 0;
74 current_programs[3] = 0;
75
76 if (current_stage_mask != 0) {
77 current_stage_mask = 0;
78 for (const GLenum program_type : ASSEMBLY_PROGRAM_ENUMS) {
79 glDisable(program_type);
80 }
81 }
82 BindPipeline();
83 }
84
85 void BindAssemblyPrograms(std::span<const OGLAssemblyProgram, NUM_STAGES> programs,
86 u32 stage_mask) {
87 const u32 changed_mask = current_stage_mask ^ stage_mask;
88 current_stage_mask = stage_mask;
89
90 if (changed_mask != 0) {
91 for (size_t stage = 0; stage < NUM_STAGES; ++stage) {
92 if (((changed_mask >> stage) & 1) != 0) {
93 if (((stage_mask >> stage) & 1) != 0) {
94 glEnable(ASSEMBLY_PROGRAM_ENUMS[stage]);
95 } else {
96 glDisable(ASSEMBLY_PROGRAM_ENUMS[stage]);
97 }
98 }
99 }
100 }
101 for (size_t stage = 0; stage < NUM_STAGES; ++stage) {
102 if (current_programs[stage] != programs[stage].handle) {
103 current_programs[stage] = programs[stage].handle;
104 glBindProgramARB(ASSEMBLY_PROGRAM_ENUMS[stage], programs[stage].handle);
105 }
106 }
107 UnbindPipeline();
108 }
109
110 void RestoreGuestCompute() {}
76 111
77 bool vertex_enabled = false; 112private:
78 bool geometry_enabled = false; 113 void BindPipeline() {
79 bool fragment_enabled = false; 114 if (!is_pipeline_bound) {
115 is_pipeline_bound = true;
116 glBindProgramPipeline(pipeline.handle);
117 }
118 UnbindCompute();
119 }
120
121 void UnbindPipeline() {
122 if (is_pipeline_bound) {
123 is_pipeline_bound = false;
124 glBindProgramPipeline(0);
125 }
126 UnbindCompute();
127 }
128
129 void UnbindCompute() {
130 if (is_compute_bound) {
131 is_compute_bound = false;
132 glUseProgram(0);
133 }
134 }
135
136 OGLPipeline pipeline;
137 bool is_pipeline_bound{};
138 bool is_compute_bound{};
139
140 u32 current_stage_mask = 0;
141 std::array<GLuint, NUM_STAGES> current_programs{};
142 GLuint current_assembly_compute_program = 0;
80}; 143};
81 144
82} // namespace OpenGL 145} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_shader_util.cpp b/src/video_core/renderer_opengl/gl_shader_util.cpp
index 4bf0d6090..d432072ad 100644
--- a/src/video_core/renderer_opengl/gl_shader_util.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_util.cpp
@@ -5,57 +5,108 @@
5#include <string_view> 5#include <string_view>
6#include <vector> 6#include <vector>
7#include <glad/glad.h> 7#include <glad/glad.h>
8
8#include "common/assert.h" 9#include "common/assert.h"
9#include "common/logging/log.h" 10#include "common/logging/log.h"
11#include "common/settings.h"
10#include "video_core/renderer_opengl/gl_shader_util.h" 12#include "video_core/renderer_opengl/gl_shader_util.h"
11 13
12namespace OpenGL::GLShader { 14namespace OpenGL {
13 15
14namespace { 16static OGLProgram LinkSeparableProgram(GLuint shader) {
17 OGLProgram program;
18 program.handle = glCreateProgram();
19 glProgramParameteri(program.handle, GL_PROGRAM_SEPARABLE, GL_TRUE);
20 glAttachShader(program.handle, shader);
21 glLinkProgram(program.handle);
22 if (!Settings::values.renderer_debug) {
23 return program;
24 }
25 GLint link_status{};
26 glGetProgramiv(program.handle, GL_LINK_STATUS, &link_status);
15 27
16std::string_view StageDebugName(GLenum type) { 28 GLint log_length{};
17 switch (type) { 29 glGetProgramiv(program.handle, GL_INFO_LOG_LENGTH, &log_length);
18 case GL_VERTEX_SHADER: 30 if (log_length == 0) {
19 return "vertex"; 31 return program;
20 case GL_GEOMETRY_SHADER: 32 }
21 return "geometry"; 33 std::string log(log_length, 0);
22 case GL_FRAGMENT_SHADER: 34 glGetProgramInfoLog(program.handle, log_length, nullptr, log.data());
23 return "fragment"; 35 if (link_status == GL_FALSE) {
24 case GL_COMPUTE_SHADER: 36 LOG_ERROR(Render_OpenGL, "{}", log);
25 return "compute"; 37 } else {
38 LOG_WARNING(Render_OpenGL, "{}", log);
26 } 39 }
27 UNIMPLEMENTED(); 40 return program;
28 return "unknown";
29} 41}
30 42
31} // Anonymous namespace 43static void LogShader(GLuint shader, std::string_view code = {}) {
44 GLint shader_status{};
45 glGetShaderiv(shader, GL_COMPILE_STATUS, &shader_status);
46 if (shader_status == GL_FALSE) {
47 LOG_ERROR(Render_OpenGL, "Failed to build shader");
48 }
49 GLint log_length{};
50 glGetShaderiv(shader, GL_INFO_LOG_LENGTH, &log_length);
51 if (log_length == 0) {
52 return;
53 }
54 std::string log(log_length, 0);
55 glGetShaderInfoLog(shader, log_length, nullptr, log.data());
56 if (shader_status == GL_FALSE) {
57 LOG_ERROR(Render_OpenGL, "{}", log);
58 if (!code.empty()) {
59 LOG_INFO(Render_OpenGL, "\n{}", code);
60 }
61 } else {
62 LOG_WARNING(Render_OpenGL, "{}", log);
63 }
64}
32 65
33GLuint LoadShader(std::string_view source, GLenum type) { 66OGLProgram CreateProgram(std::string_view code, GLenum stage) {
34 const std::string_view debug_type = StageDebugName(type); 67 OGLShader shader;
35 const GLuint shader_id = glCreateShader(type); 68 shader.handle = glCreateShader(stage);
36 69
37 const GLchar* source_string = source.data(); 70 const GLint length = static_cast<GLint>(code.size());
38 const GLint source_length = static_cast<GLint>(source.size()); 71 const GLchar* const code_ptr = code.data();
72 glShaderSource(shader.handle, 1, &code_ptr, &length);
73 glCompileShader(shader.handle);
74 if (Settings::values.renderer_debug) {
75 LogShader(shader.handle, code);
76 }
77 return LinkSeparableProgram(shader.handle);
78}
39 79
40 glShaderSource(shader_id, 1, &source_string, &source_length); 80OGLProgram CreateProgram(std::span<const u32> code, GLenum stage) {
41 LOG_DEBUG(Render_OpenGL, "Compiling {} shader...", debug_type); 81 OGLShader shader;
42 glCompileShader(shader_id); 82 shader.handle = glCreateShader(stage);
43 83
44 GLint result = GL_FALSE; 84 glShaderBinary(1, &shader.handle, GL_SHADER_BINARY_FORMAT_SPIR_V_ARB, code.data(),
45 GLint info_log_length; 85 static_cast<GLsizei>(code.size_bytes()));
46 glGetShaderiv(shader_id, GL_COMPILE_STATUS, &result); 86 glSpecializeShader(shader.handle, "main", 0, nullptr, nullptr);
47 glGetShaderiv(shader_id, GL_INFO_LOG_LENGTH, &info_log_length); 87 if (Settings::values.renderer_debug) {
88 LogShader(shader.handle);
89 }
90 return LinkSeparableProgram(shader.handle);
91}
48 92
49 if (info_log_length > 1) { 93OGLAssemblyProgram CompileProgram(std::string_view code, GLenum target) {
50 std::string shader_error(info_log_length, ' '); 94 OGLAssemblyProgram program;
51 glGetShaderInfoLog(shader_id, info_log_length, nullptr, &shader_error[0]); 95 glGenProgramsARB(1, &program.handle);
52 if (result == GL_TRUE) { 96 glNamedProgramStringEXT(program.handle, target, GL_PROGRAM_FORMAT_ASCII_ARB,
53 LOG_DEBUG(Render_OpenGL, "{}", shader_error); 97 static_cast<GLsizei>(code.size()), code.data());
54 } else { 98 if (Settings::values.renderer_debug) {
55 LOG_ERROR(Render_OpenGL, "Error compiling {} shader:\n{}", debug_type, shader_error); 99 const auto err = reinterpret_cast<const char*>(glGetString(GL_PROGRAM_ERROR_STRING_NV));
100 if (err && *err) {
101 if (std::strstr(err, "error")) {
102 LOG_CRITICAL(Render_OpenGL, "\n{}", err);
103 LOG_INFO(Render_OpenGL, "\n{}", code);
104 } else {
105 LOG_WARNING(Render_OpenGL, "\n{}", err);
106 }
56 } 107 }
57 } 108 }
58 return shader_id; 109 return program;
59} 110}
60 111
61} // namespace OpenGL::GLShader 112} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_shader_util.h b/src/video_core/renderer_opengl/gl_shader_util.h
index 1b770532e..4e1a2a8e1 100644
--- a/src/video_core/renderer_opengl/gl_shader_util.h
+++ b/src/video_core/renderer_opengl/gl_shader_util.h
@@ -4,92 +4,23 @@
4 4
5#pragma once 5#pragma once
6 6
7#include <span>
7#include <string> 8#include <string>
9#include <string_view>
8#include <vector> 10#include <vector>
11
9#include <glad/glad.h> 12#include <glad/glad.h>
13
10#include "common/assert.h" 14#include "common/assert.h"
11#include "common/logging/log.h" 15#include "common/logging/log.h"
16#include "video_core/renderer_opengl/gl_resource_manager.h"
12 17
13namespace OpenGL::GLShader { 18namespace OpenGL {
14
15/**
16 * Utility function to log the source code of a list of shaders.
17 * @param shaders The OpenGL shaders whose source we will print.
18 */
19template <typename... T>
20void LogShaderSource(T... shaders) {
21 auto shader_list = {shaders...};
22
23 for (const auto& shader : shader_list) {
24 if (shader == 0)
25 continue;
26
27 GLint source_length;
28 glGetShaderiv(shader, GL_SHADER_SOURCE_LENGTH, &source_length);
29
30 std::string source(source_length, ' ');
31 glGetShaderSource(shader, source_length, nullptr, &source[0]);
32 LOG_INFO(Render_OpenGL, "Shader source {}", source);
33 }
34}
35
36/**
37 * Utility function to create and compile an OpenGL GLSL shader
38 * @param source String of the GLSL shader program
39 * @param type Type of the shader (GL_VERTEX_SHADER, GL_GEOMETRY_SHADER or GL_FRAGMENT_SHADER)
40 */
41GLuint LoadShader(std::string_view source, GLenum type);
42
43/**
44 * Utility function to create and compile an OpenGL GLSL shader program (vertex + fragment shader)
45 * @param separable_program whether to create a separable program
46 * @param shaders ID of shaders to attach to the program
47 * @returns Handle of the newly created OpenGL program object
48 */
49template <typename... T>
50GLuint LoadProgram(bool separable_program, bool hint_retrievable, T... shaders) {
51 // Link the program
52 LOG_DEBUG(Render_OpenGL, "Linking program...");
53
54 GLuint program_id = glCreateProgram();
55
56 ((shaders == 0 ? (void)0 : glAttachShader(program_id, shaders)), ...);
57
58 if (separable_program) {
59 glProgramParameteri(program_id, GL_PROGRAM_SEPARABLE, GL_TRUE);
60 }
61 if (hint_retrievable) {
62 glProgramParameteri(program_id, GL_PROGRAM_BINARY_RETRIEVABLE_HINT, GL_TRUE);
63 }
64
65 glLinkProgram(program_id);
66
67 // Check the program
68 GLint result = GL_FALSE;
69 GLint info_log_length;
70 glGetProgramiv(program_id, GL_LINK_STATUS, &result);
71 glGetProgramiv(program_id, GL_INFO_LOG_LENGTH, &info_log_length);
72
73 if (info_log_length > 1) {
74 std::string program_error(info_log_length, ' ');
75 glGetProgramInfoLog(program_id, info_log_length, nullptr, &program_error[0]);
76 if (result == GL_TRUE) {
77 LOG_DEBUG(Render_OpenGL, "{}", program_error);
78 } else {
79 LOG_ERROR(Render_OpenGL, "Error linking shader:\n{}", program_error);
80 }
81 }
82
83 if (result == GL_FALSE) {
84 // There was a problem linking the shader, print the source for debugging purposes.
85 LogShaderSource(shaders...);
86 }
87 19
88 ASSERT_MSG(result == GL_TRUE, "Shader not linked"); 20OGLProgram CreateProgram(std::string_view code, GLenum stage);
89 21
90 ((shaders == 0 ? (void)0 : glDetachShader(program_id, shaders)), ...); 22OGLProgram CreateProgram(std::span<const u32> code, GLenum stage);
91 23
92 return program_id; 24OGLAssemblyProgram CompileProgram(std::string_view code, GLenum target);
93}
94 25
95} // namespace OpenGL::GLShader 26} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_state_tracker.cpp b/src/video_core/renderer_opengl/gl_state_tracker.cpp
index dbdf5230f..586da84e3 100644
--- a/src/video_core/renderer_opengl/gl_state_tracker.cpp
+++ b/src/video_core/renderer_opengl/gl_state_tracker.cpp
@@ -83,11 +83,6 @@ void SetupDirtyScissors(Tables& tables) {
83 FillBlock(tables[1], OFF(scissor_test), NUM(scissor_test), Scissors); 83 FillBlock(tables[1], OFF(scissor_test), NUM(scissor_test), Scissors);
84} 84}
85 85
86void SetupDirtyShaders(Tables& tables) {
87 FillBlock(tables[0], OFF(shader_config[0]), NUM(shader_config[0]) * Regs::MaxShaderProgram,
88 Shaders);
89}
90
91void SetupDirtyPolygonModes(Tables& tables) { 86void SetupDirtyPolygonModes(Tables& tables) {
92 tables[0][OFF(polygon_mode_front)] = PolygonModeFront; 87 tables[0][OFF(polygon_mode_front)] = PolygonModeFront;
93 tables[0][OFF(polygon_mode_back)] = PolygonModeBack; 88 tables[0][OFF(polygon_mode_back)] = PolygonModeBack;
@@ -217,7 +212,6 @@ StateTracker::StateTracker(Tegra::GPU& gpu) : flags{gpu.Maxwell3D().dirty.flags}
217 SetupDirtyScissors(tables); 212 SetupDirtyScissors(tables);
218 SetupDirtyVertexInstances(tables); 213 SetupDirtyVertexInstances(tables);
219 SetupDirtyVertexFormat(tables); 214 SetupDirtyVertexFormat(tables);
220 SetupDirtyShaders(tables);
221 SetupDirtyPolygonModes(tables); 215 SetupDirtyPolygonModes(tables);
222 SetupDirtyDepthTest(tables); 216 SetupDirtyDepthTest(tables);
223 SetupDirtyStencilTest(tables); 217 SetupDirtyStencilTest(tables);
diff --git a/src/video_core/renderer_opengl/gl_state_tracker.h b/src/video_core/renderer_opengl/gl_state_tracker.h
index 94c905116..5864c7c07 100644
--- a/src/video_core/renderer_opengl/gl_state_tracker.h
+++ b/src/video_core/renderer_opengl/gl_state_tracker.h
@@ -52,7 +52,6 @@ enum : u8 {
52 BlendState0, 52 BlendState0,
53 BlendState7 = BlendState0 + 7, 53 BlendState7 = BlendState0 + 7,
54 54
55 Shaders,
56 ClipDistances, 55 ClipDistances,
57 56
58 PolygonModes, 57 PolygonModes,
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp
index ff0f03e99..c373c9cb4 100644
--- a/src/video_core/renderer_opengl/gl_texture_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp
@@ -24,9 +24,7 @@
24#include "video_core/textures/decoders.h" 24#include "video_core/textures/decoders.h"
25 25
26namespace OpenGL { 26namespace OpenGL {
27
28namespace { 27namespace {
29
30using Tegra::Texture::SwizzleSource; 28using Tegra::Texture::SwizzleSource;
31using Tegra::Texture::TextureMipmapFilter; 29using Tegra::Texture::TextureMipmapFilter;
32using Tegra::Texture::TextureType; 30using Tegra::Texture::TextureType;
@@ -59,107 +57,6 @@ struct CopyRegion {
59 GLsizei depth; 57 GLsizei depth;
60}; 58};
61 59
62struct FormatTuple {
63 GLenum internal_format;
64 GLenum format = GL_NONE;
65 GLenum type = GL_NONE;
66};
67
68constexpr std::array<FormatTuple, MaxPixelFormat> FORMAT_TABLE = {{
69 {GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV}, // A8B8G8R8_UNORM
70 {GL_RGBA8_SNORM, GL_RGBA, GL_BYTE}, // A8B8G8R8_SNORM
71 {GL_RGBA8I, GL_RGBA_INTEGER, GL_BYTE}, // A8B8G8R8_SINT
72 {GL_RGBA8UI, GL_RGBA_INTEGER, GL_UNSIGNED_BYTE}, // A8B8G8R8_UINT
73 {GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5}, // R5G6B5_UNORM
74 {GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5_REV}, // B5G6R5_UNORM
75 {GL_RGB5_A1, GL_BGRA, GL_UNSIGNED_SHORT_1_5_5_5_REV}, // A1R5G5B5_UNORM
76 {GL_RGB10_A2, GL_RGBA, GL_UNSIGNED_INT_2_10_10_10_REV}, // A2B10G10R10_UNORM
77 {GL_RGB10_A2UI, GL_RGBA_INTEGER, GL_UNSIGNED_INT_2_10_10_10_REV}, // A2B10G10R10_UINT
78 {GL_RGB5_A1, GL_RGBA, GL_UNSIGNED_SHORT_1_5_5_5_REV}, // A1B5G5R5_UNORM
79 {GL_R8, GL_RED, GL_UNSIGNED_BYTE}, // R8_UNORM
80 {GL_R8_SNORM, GL_RED, GL_BYTE}, // R8_SNORM
81 {GL_R8I, GL_RED_INTEGER, GL_BYTE}, // R8_SINT
82 {GL_R8UI, GL_RED_INTEGER, GL_UNSIGNED_BYTE}, // R8_UINT
83 {GL_RGBA16F, GL_RGBA, GL_HALF_FLOAT}, // R16G16B16A16_FLOAT
84 {GL_RGBA16, GL_RGBA, GL_UNSIGNED_SHORT}, // R16G16B16A16_UNORM
85 {GL_RGBA16_SNORM, GL_RGBA, GL_SHORT}, // R16G16B16A16_SNORM
86 {GL_RGBA16I, GL_RGBA_INTEGER, GL_SHORT}, // R16G16B16A16_SINT
87 {GL_RGBA16UI, GL_RGBA_INTEGER, GL_UNSIGNED_SHORT}, // R16G16B16A16_UINT
88 {GL_R11F_G11F_B10F, GL_RGB, GL_UNSIGNED_INT_10F_11F_11F_REV}, // B10G11R11_FLOAT
89 {GL_RGBA32UI, GL_RGBA_INTEGER, GL_UNSIGNED_INT}, // R32G32B32A32_UINT
90 {GL_COMPRESSED_RGBA_S3TC_DXT1_EXT}, // BC1_RGBA_UNORM
91 {GL_COMPRESSED_RGBA_S3TC_DXT3_EXT}, // BC2_UNORM
92 {GL_COMPRESSED_RGBA_S3TC_DXT5_EXT}, // BC3_UNORM
93 {GL_COMPRESSED_RED_RGTC1}, // BC4_UNORM
94 {GL_COMPRESSED_SIGNED_RED_RGTC1}, // BC4_SNORM
95 {GL_COMPRESSED_RG_RGTC2}, // BC5_UNORM
96 {GL_COMPRESSED_SIGNED_RG_RGTC2}, // BC5_SNORM
97 {GL_COMPRESSED_RGBA_BPTC_UNORM}, // BC7_UNORM
98 {GL_COMPRESSED_RGB_BPTC_UNSIGNED_FLOAT}, // BC6H_UFLOAT
99 {GL_COMPRESSED_RGB_BPTC_SIGNED_FLOAT}, // BC6H_SFLOAT
100 {GL_COMPRESSED_RGBA_ASTC_4x4_KHR}, // ASTC_2D_4X4_UNORM
101 {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE}, // B8G8R8A8_UNORM
102 {GL_RGBA32F, GL_RGBA, GL_FLOAT}, // R32G32B32A32_FLOAT
103 {GL_RGBA32I, GL_RGBA_INTEGER, GL_INT}, // R32G32B32A32_SINT
104 {GL_RG32F, GL_RG, GL_FLOAT}, // R32G32_FLOAT
105 {GL_RG32I, GL_RG_INTEGER, GL_INT}, // R32G32_SINT
106 {GL_R32F, GL_RED, GL_FLOAT}, // R32_FLOAT
107 {GL_R16F, GL_RED, GL_HALF_FLOAT}, // R16_FLOAT
108 {GL_R16, GL_RED, GL_UNSIGNED_SHORT}, // R16_UNORM
109 {GL_R16_SNORM, GL_RED, GL_SHORT}, // R16_SNORM
110 {GL_R16UI, GL_RED_INTEGER, GL_UNSIGNED_SHORT}, // R16_UINT
111 {GL_R16I, GL_RED_INTEGER, GL_SHORT}, // R16_SINT
112 {GL_RG16, GL_RG, GL_UNSIGNED_SHORT}, // R16G16_UNORM
113 {GL_RG16F, GL_RG, GL_HALF_FLOAT}, // R16G16_FLOAT
114 {GL_RG16UI, GL_RG_INTEGER, GL_UNSIGNED_SHORT}, // R16G16_UINT
115 {GL_RG16I, GL_RG_INTEGER, GL_SHORT}, // R16G16_SINT
116 {GL_RG16_SNORM, GL_RG, GL_SHORT}, // R16G16_SNORM
117 {GL_RGB32F, GL_RGB, GL_FLOAT}, // R32G32B32_FLOAT
118 {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV}, // A8B8G8R8_SRGB
119 {GL_RG8, GL_RG, GL_UNSIGNED_BYTE}, // R8G8_UNORM
120 {GL_RG8_SNORM, GL_RG, GL_BYTE}, // R8G8_SNORM
121 {GL_RG8I, GL_RG_INTEGER, GL_BYTE}, // R8G8_SINT
122 {GL_RG8UI, GL_RG_INTEGER, GL_UNSIGNED_BYTE}, // R8G8_UINT
123 {GL_RG32UI, GL_RG_INTEGER, GL_UNSIGNED_INT}, // R32G32_UINT
124 {GL_RGB16F, GL_RGBA, GL_HALF_FLOAT}, // R16G16B16X16_FLOAT
125 {GL_R32UI, GL_RED_INTEGER, GL_UNSIGNED_INT}, // R32_UINT
126 {GL_R32I, GL_RED_INTEGER, GL_INT}, // R32_SINT
127 {GL_COMPRESSED_RGBA_ASTC_8x8_KHR}, // ASTC_2D_8X8_UNORM
128 {GL_COMPRESSED_RGBA_ASTC_8x5_KHR}, // ASTC_2D_8X5_UNORM
129 {GL_COMPRESSED_RGBA_ASTC_5x4_KHR}, // ASTC_2D_5X4_UNORM
130 {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE}, // B8G8R8A8_SRGB
131 {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT1_EXT}, // BC1_RGBA_SRGB
132 {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT3_EXT}, // BC2_SRGB
133 {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT}, // BC3_SRGB
134 {GL_COMPRESSED_SRGB_ALPHA_BPTC_UNORM}, // BC7_SRGB
135 {GL_RGBA4, GL_RGBA, GL_UNSIGNED_SHORT_4_4_4_4_REV}, // A4B4G4R4_UNORM
136 {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_4x4_KHR}, // ASTC_2D_4X4_SRGB
137 {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x8_KHR}, // ASTC_2D_8X8_SRGB
138 {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x5_KHR}, // ASTC_2D_8X5_SRGB
139 {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x4_KHR}, // ASTC_2D_5X4_SRGB
140 {GL_COMPRESSED_RGBA_ASTC_5x5_KHR}, // ASTC_2D_5X5_UNORM
141 {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x5_KHR}, // ASTC_2D_5X5_SRGB
142 {GL_COMPRESSED_RGBA_ASTC_10x8_KHR}, // ASTC_2D_10X8_UNORM
143 {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x8_KHR}, // ASTC_2D_10X8_SRGB
144 {GL_COMPRESSED_RGBA_ASTC_6x6_KHR}, // ASTC_2D_6X6_UNORM
145 {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x6_KHR}, // ASTC_2D_6X6_SRGB
146 {GL_COMPRESSED_RGBA_ASTC_10x10_KHR}, // ASTC_2D_10X10_UNORM
147 {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x10_KHR}, // ASTC_2D_10X10_SRGB
148 {GL_COMPRESSED_RGBA_ASTC_12x12_KHR}, // ASTC_2D_12X12_UNORM
149 {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_12x12_KHR}, // ASTC_2D_12X12_SRGB
150 {GL_COMPRESSED_RGBA_ASTC_8x6_KHR}, // ASTC_2D_8X6_UNORM
151 {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x6_KHR}, // ASTC_2D_8X6_SRGB
152 {GL_COMPRESSED_RGBA_ASTC_6x5_KHR}, // ASTC_2D_6X5_UNORM
153 {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x5_KHR}, // ASTC_2D_6X5_SRGB
154 {GL_RGB9_E5, GL_RGB, GL_UNSIGNED_INT_5_9_9_9_REV}, // E5B9G9R9_FLOAT
155 {GL_DEPTH_COMPONENT32F, GL_DEPTH_COMPONENT, GL_FLOAT}, // D32_FLOAT
156 {GL_DEPTH_COMPONENT16, GL_DEPTH_COMPONENT, GL_UNSIGNED_SHORT}, // D16_UNORM
157 {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8}, // D24_UNORM_S8_UINT
158 {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8}, // S8_UINT_D24_UNORM
159 {GL_DEPTH32F_STENCIL8, GL_DEPTH_STENCIL,
160 GL_FLOAT_32_UNSIGNED_INT_24_8_REV}, // D32_FLOAT_S8_UINT
161}};
162
163constexpr std::array ACCELERATED_FORMATS{ 60constexpr std::array ACCELERATED_FORMATS{
164 GL_RGBA32F, GL_RGBA16F, GL_RG32F, GL_RG16F, GL_R11F_G11F_B10F, GL_R32F, 61 GL_RGBA32F, GL_RGBA16F, GL_RG32F, GL_RG16F, GL_R11F_G11F_B10F, GL_R32F,
165 GL_R16F, GL_RGBA32UI, GL_RGBA16UI, GL_RGB10_A2UI, GL_RGBA8UI, GL_RG32UI, 62 GL_R16F, GL_RGBA32UI, GL_RGBA16UI, GL_RGB10_A2UI, GL_RGBA8UI, GL_RG32UI,
@@ -170,11 +67,6 @@ constexpr std::array ACCELERATED_FORMATS{
170 GL_RG8_SNORM, GL_R16_SNORM, GL_R8_SNORM, 67 GL_RG8_SNORM, GL_R16_SNORM, GL_R8_SNORM,
171}; 68};
172 69
173const FormatTuple& GetFormatTuple(PixelFormat pixel_format) {
174 ASSERT(static_cast<size_t>(pixel_format) < FORMAT_TABLE.size());
175 return FORMAT_TABLE[static_cast<size_t>(pixel_format)];
176}
177
178GLenum ImageTarget(const VideoCommon::ImageInfo& info) { 70GLenum ImageTarget(const VideoCommon::ImageInfo& info) {
179 switch (info.type) { 71 switch (info.type) {
180 case ImageType::e1D: 72 case ImageType::e1D:
@@ -195,26 +87,24 @@ GLenum ImageTarget(const VideoCommon::ImageInfo& info) {
195 return GL_NONE; 87 return GL_NONE;
196} 88}
197 89
198GLenum ImageTarget(ImageViewType type, int num_samples = 1) { 90GLenum ImageTarget(Shader::TextureType type, int num_samples = 1) {
199 const bool is_multisampled = num_samples > 1; 91 const bool is_multisampled = num_samples > 1;
200 switch (type) { 92 switch (type) {
201 case ImageViewType::e1D: 93 case Shader::TextureType::Color1D:
202 return GL_TEXTURE_1D; 94 return GL_TEXTURE_1D;
203 case ImageViewType::e2D: 95 case Shader::TextureType::Color2D:
204 return is_multisampled ? GL_TEXTURE_2D_MULTISAMPLE : GL_TEXTURE_2D; 96 return is_multisampled ? GL_TEXTURE_2D_MULTISAMPLE : GL_TEXTURE_2D;
205 case ImageViewType::Cube: 97 case Shader::TextureType::ColorCube:
206 return GL_TEXTURE_CUBE_MAP; 98 return GL_TEXTURE_CUBE_MAP;
207 case ImageViewType::e3D: 99 case Shader::TextureType::Color3D:
208 return GL_TEXTURE_3D; 100 return GL_TEXTURE_3D;
209 case ImageViewType::e1DArray: 101 case Shader::TextureType::ColorArray1D:
210 return GL_TEXTURE_1D_ARRAY; 102 return GL_TEXTURE_1D_ARRAY;
211 case ImageViewType::e2DArray: 103 case Shader::TextureType::ColorArray2D:
212 return is_multisampled ? GL_TEXTURE_2D_MULTISAMPLE_ARRAY : GL_TEXTURE_2D_ARRAY; 104 return is_multisampled ? GL_TEXTURE_2D_MULTISAMPLE_ARRAY : GL_TEXTURE_2D_ARRAY;
213 case ImageViewType::CubeArray: 105 case Shader::TextureType::ColorArrayCube:
214 return GL_TEXTURE_CUBE_MAP_ARRAY; 106 return GL_TEXTURE_CUBE_MAP_ARRAY;
215 case ImageViewType::Rect: 107 case Shader::TextureType::Buffer:
216 return GL_TEXTURE_RECTANGLE;
217 case ImageViewType::Buffer:
218 return GL_TEXTURE_BUFFER; 108 return GL_TEXTURE_BUFFER;
219 } 109 }
220 UNREACHABLE_MSG("Invalid image view type={}", type); 110 UNREACHABLE_MSG("Invalid image view type={}", type);
@@ -322,7 +212,7 @@ void ApplySwizzle(GLuint handle, PixelFormat format, std::array<SwizzleSource, 4
322 default: 212 default:
323 return false; 213 return false;
324 } 214 }
325 const GLenum internal_format = GetFormatTuple(info.format).internal_format; 215 const GLenum internal_format = MaxwellToGL::GetFormatTuple(info.format).internal_format;
326 const auto& format_info = runtime.FormatInfo(info.type, internal_format); 216 const auto& format_info = runtime.FormatInfo(info.type, internal_format);
327 if (format_info.is_compressed) { 217 if (format_info.is_compressed) {
328 return false; 218 return false;
@@ -414,11 +304,10 @@ void ApplySwizzle(GLuint handle, PixelFormat format, std::array<SwizzleSource, 4
414 304
415void AttachTexture(GLuint fbo, GLenum attachment, const ImageView* image_view) { 305void AttachTexture(GLuint fbo, GLenum attachment, const ImageView* image_view) {
416 if (False(image_view->flags & VideoCommon::ImageViewFlagBits::Slice)) { 306 if (False(image_view->flags & VideoCommon::ImageViewFlagBits::Slice)) {
417 const GLuint texture = image_view->DefaultHandle(); 307 glNamedFramebufferTexture(fbo, attachment, image_view->DefaultHandle(), 0);
418 glNamedFramebufferTexture(fbo, attachment, texture, 0);
419 return; 308 return;
420 } 309 }
421 const GLuint texture = image_view->Handle(ImageViewType::e3D); 310 const GLuint texture = image_view->Handle(Shader::TextureType::Color3D);
422 if (image_view->range.extent.layers > 1) { 311 if (image_view->range.extent.layers > 1) {
423 // TODO: OpenGL doesn't support rendering to a fixed number of slices 312 // TODO: OpenGL doesn't support rendering to a fixed number of slices
424 glNamedFramebufferTexture(fbo, attachment, texture, 0); 313 glNamedFramebufferTexture(fbo, attachment, texture, 0);
@@ -439,6 +328,28 @@ void AttachTexture(GLuint fbo, GLenum attachment, const ImageView* image_view) {
439 } 328 }
440} 329}
441 330
331[[nodiscard]] GLenum ShaderFormat(Shader::ImageFormat format) {
332 switch (format) {
333 case Shader::ImageFormat::Typeless:
334 break;
335 case Shader::ImageFormat::R8_SINT:
336 return GL_R8I;
337 case Shader::ImageFormat::R8_UINT:
338 return GL_R8UI;
339 case Shader::ImageFormat::R16_UINT:
340 return GL_R16UI;
341 case Shader::ImageFormat::R16_SINT:
342 return GL_R16I;
343 case Shader::ImageFormat::R32_UINT:
344 return GL_R32UI;
345 case Shader::ImageFormat::R32G32_UINT:
346 return GL_RG32UI;
347 case Shader::ImageFormat::R32G32B32A32_UINT:
348 return GL_RGBA32UI;
349 }
350 UNREACHABLE_MSG("Invalid image format={}", format);
351 return GL_R32UI;
352}
442} // Anonymous namespace 353} // Anonymous namespace
443 354
444ImageBufferMap::~ImageBufferMap() { 355ImageBufferMap::~ImageBufferMap() {
@@ -453,7 +364,7 @@ TextureCacheRuntime::TextureCacheRuntime(const Device& device_, ProgramManager&
453 static constexpr std::array TARGETS{GL_TEXTURE_1D_ARRAY, GL_TEXTURE_2D_ARRAY, GL_TEXTURE_3D}; 364 static constexpr std::array TARGETS{GL_TEXTURE_1D_ARRAY, GL_TEXTURE_2D_ARRAY, GL_TEXTURE_3D};
454 for (size_t i = 0; i < TARGETS.size(); ++i) { 365 for (size_t i = 0; i < TARGETS.size(); ++i) {
455 const GLenum target = TARGETS[i]; 366 const GLenum target = TARGETS[i];
456 for (const FormatTuple& tuple : FORMAT_TABLE) { 367 for (const MaxwellToGL::FormatTuple& tuple : MaxwellToGL::FORMAT_TABLE) {
457 const GLenum format = tuple.internal_format; 368 const GLenum format = tuple.internal_format;
458 GLint compat_class; 369 GLint compat_class;
459 GLint compat_type; 370 GLint compat_type;
@@ -475,11 +386,9 @@ TextureCacheRuntime::TextureCacheRuntime(const Device& device_, ProgramManager&
475 null_image_1d_array.Create(GL_TEXTURE_1D_ARRAY); 386 null_image_1d_array.Create(GL_TEXTURE_1D_ARRAY);
476 null_image_cube_array.Create(GL_TEXTURE_CUBE_MAP_ARRAY); 387 null_image_cube_array.Create(GL_TEXTURE_CUBE_MAP_ARRAY);
477 null_image_3d.Create(GL_TEXTURE_3D); 388 null_image_3d.Create(GL_TEXTURE_3D);
478 null_image_rect.Create(GL_TEXTURE_RECTANGLE);
479 glTextureStorage2D(null_image_1d_array.handle, 1, GL_R8, 1, 1); 389 glTextureStorage2D(null_image_1d_array.handle, 1, GL_R8, 1, 1);
480 glTextureStorage3D(null_image_cube_array.handle, 1, GL_R8, 1, 1, 6); 390 glTextureStorage3D(null_image_cube_array.handle, 1, GL_R8, 1, 1, 6);
481 glTextureStorage3D(null_image_3d.handle, 1, GL_R8, 1, 1, 1); 391 glTextureStorage3D(null_image_3d.handle, 1, GL_R8, 1, 1, 1);
482 glTextureStorage2D(null_image_rect.handle, 1, GL_R8, 1, 1);
483 392
484 std::array<GLuint, 4> new_handles; 393 std::array<GLuint, 4> new_handles;
485 glGenTextures(static_cast<GLsizei>(new_handles.size()), new_handles.data()); 394 glGenTextures(static_cast<GLsizei>(new_handles.size()), new_handles.data());
@@ -496,29 +405,28 @@ TextureCacheRuntime::TextureCacheRuntime(const Device& device_, ProgramManager&
496 glTextureView(null_image_view_cube.handle, GL_TEXTURE_CUBE_MAP, null_image_cube_array.handle, 405 glTextureView(null_image_view_cube.handle, GL_TEXTURE_CUBE_MAP, null_image_cube_array.handle,
497 GL_R8, 0, 1, 0, 6); 406 GL_R8, 0, 1, 0, 6);
498 const std::array texture_handles{ 407 const std::array texture_handles{
499 null_image_1d_array.handle, null_image_cube_array.handle, null_image_3d.handle, 408 null_image_1d_array.handle, null_image_cube_array.handle, null_image_3d.handle,
500 null_image_rect.handle, null_image_view_1d.handle, null_image_view_2d.handle, 409 null_image_view_1d.handle, null_image_view_2d.handle, null_image_view_2d_array.handle,
501 null_image_view_2d_array.handle, null_image_view_cube.handle, 410 null_image_view_cube.handle,
502 }; 411 };
503 for (const GLuint handle : texture_handles) { 412 for (const GLuint handle : texture_handles) {
504 static constexpr std::array NULL_SWIZZLE{GL_ZERO, GL_ZERO, GL_ZERO, GL_ZERO}; 413 static constexpr std::array NULL_SWIZZLE{GL_ZERO, GL_ZERO, GL_ZERO, GL_ZERO};
505 glTextureParameteriv(handle, GL_TEXTURE_SWIZZLE_RGBA, NULL_SWIZZLE.data()); 414 glTextureParameteriv(handle, GL_TEXTURE_SWIZZLE_RGBA, NULL_SWIZZLE.data());
506 } 415 }
507 const auto set_view = [this](ImageViewType type, GLuint handle) { 416 const auto set_view = [this](Shader::TextureType type, GLuint handle) {
508 if (device.HasDebuggingToolAttached()) { 417 if (device.HasDebuggingToolAttached()) {
509 const std::string name = fmt::format("NullImage {}", type); 418 const std::string name = fmt::format("NullImage {}", type);
510 glObjectLabel(GL_TEXTURE, handle, static_cast<GLsizei>(name.size()), name.data()); 419 glObjectLabel(GL_TEXTURE, handle, static_cast<GLsizei>(name.size()), name.data());
511 } 420 }
512 null_image_views[static_cast<size_t>(type)] = handle; 421 null_image_views[static_cast<size_t>(type)] = handle;
513 }; 422 };
514 set_view(ImageViewType::e1D, null_image_view_1d.handle); 423 set_view(Shader::TextureType::Color1D, null_image_view_1d.handle);
515 set_view(ImageViewType::e2D, null_image_view_2d.handle); 424 set_view(Shader::TextureType::Color2D, null_image_view_2d.handle);
516 set_view(ImageViewType::Cube, null_image_view_cube.handle); 425 set_view(Shader::TextureType::ColorCube, null_image_view_cube.handle);
517 set_view(ImageViewType::e3D, null_image_3d.handle); 426 set_view(Shader::TextureType::Color3D, null_image_3d.handle);
518 set_view(ImageViewType::e1DArray, null_image_1d_array.handle); 427 set_view(Shader::TextureType::ColorArray1D, null_image_1d_array.handle);
519 set_view(ImageViewType::e2DArray, null_image_view_2d_array.handle); 428 set_view(Shader::TextureType::ColorArray2D, null_image_view_2d_array.handle);
520 set_view(ImageViewType::CubeArray, null_image_cube_array.handle); 429 set_view(Shader::TextureType::ColorArrayCube, null_image_cube_array.handle);
521 set_view(ImageViewType::Rect, null_image_rect.handle);
522} 430}
523 431
524TextureCacheRuntime::~TextureCacheRuntime() = default; 432TextureCacheRuntime::~TextureCacheRuntime() = default;
@@ -710,7 +618,7 @@ Image::Image(TextureCacheRuntime& runtime, const VideoCommon::ImageInfo& info_,
710 gl_format = GL_RGBA; 618 gl_format = GL_RGBA;
711 gl_type = GL_UNSIGNED_INT_8_8_8_8_REV; 619 gl_type = GL_UNSIGNED_INT_8_8_8_8_REV;
712 } else { 620 } else {
713 const auto& tuple = GetFormatTuple(info.format); 621 const auto& tuple = MaxwellToGL::GetFormatTuple(info.format);
714 gl_internal_format = tuple.internal_format; 622 gl_internal_format = tuple.internal_format;
715 gl_format = tuple.format; 623 gl_format = tuple.format;
716 gl_type = tuple.type; 624 gl_type = tuple.type;
@@ -750,8 +658,7 @@ Image::Image(TextureCacheRuntime& runtime, const VideoCommon::ImageInfo& info_,
750 glTextureStorage3D(handle, num_levels, gl_internal_format, width, height, depth); 658 glTextureStorage3D(handle, num_levels, gl_internal_format, width, height, depth);
751 break; 659 break;
752 case GL_TEXTURE_BUFFER: 660 case GL_TEXTURE_BUFFER:
753 buffer.Create(); 661 UNREACHABLE();
754 glNamedBufferStorage(buffer.handle, guest_size_bytes, nullptr, 0);
755 break; 662 break;
756 default: 663 default:
757 UNREACHABLE_MSG("Invalid target=0x{:x}", target); 664 UNREACHABLE_MSG("Invalid target=0x{:x}", target);
@@ -789,14 +696,6 @@ void Image::UploadMemory(const ImageBufferMap& map,
789 } 696 }
790} 697}
791 698
792void Image::UploadMemory(const ImageBufferMap& map,
793 std::span<const VideoCommon::BufferCopy> copies) {
794 for (const VideoCommon::BufferCopy& copy : copies) {
795 glCopyNamedBufferSubData(map.buffer, buffer.handle, copy.src_offset + map.offset,
796 copy.dst_offset, copy.size);
797 }
798}
799
800void Image::DownloadMemory(ImageBufferMap& map, 699void Image::DownloadMemory(ImageBufferMap& map,
801 std::span<const VideoCommon::BufferImageCopy> copies) { 700 std::span<const VideoCommon::BufferImageCopy> copies) {
802 glMemoryBarrier(GL_PIXEL_BUFFER_BARRIER_BIT); // TODO: Move this to its own API 701 glMemoryBarrier(GL_PIXEL_BUFFER_BARRIER_BIT); // TODO: Move this to its own API
@@ -958,23 +857,30 @@ ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageViewI
958 if (True(image.flags & ImageFlagBits::Converted)) { 857 if (True(image.flags & ImageFlagBits::Converted)) {
959 internal_format = IsPixelFormatSRGB(info.format) ? GL_SRGB8_ALPHA8 : GL_RGBA8; 858 internal_format = IsPixelFormatSRGB(info.format) ? GL_SRGB8_ALPHA8 : GL_RGBA8;
960 } else { 859 } else {
961 internal_format = GetFormatTuple(format).internal_format; 860 internal_format = MaxwellToGL::GetFormatTuple(format).internal_format;
861 }
862 full_range = info.range;
863 flat_range = info.range;
864 set_object_label = device.HasDebuggingToolAttached();
865 is_render_target = info.IsRenderTarget();
866 original_texture = image.texture.handle;
867 num_samples = image.info.num_samples;
868 if (!is_render_target) {
869 swizzle[0] = info.x_source;
870 swizzle[1] = info.y_source;
871 swizzle[2] = info.z_source;
872 swizzle[3] = info.w_source;
962 } 873 }
963 VideoCommon::SubresourceRange flatten_range = info.range;
964 std::array<GLuint, 2> handles;
965 stored_views.reserve(2);
966
967 switch (info.type) { 874 switch (info.type) {
968 case ImageViewType::e1DArray: 875 case ImageViewType::e1DArray:
969 flatten_range.extent.layers = 1; 876 flat_range.extent.layers = 1;
970 [[fallthrough]]; 877 [[fallthrough]];
971 case ImageViewType::e1D: 878 case ImageViewType::e1D:
972 glGenTextures(2, handles.data()); 879 SetupView(Shader::TextureType::Color1D);
973 SetupView(device, image, ImageViewType::e1D, handles[0], info, flatten_range); 880 SetupView(Shader::TextureType::ColorArray1D);
974 SetupView(device, image, ImageViewType::e1DArray, handles[1], info, info.range);
975 break; 881 break;
976 case ImageViewType::e2DArray: 882 case ImageViewType::e2DArray:
977 flatten_range.extent.layers = 1; 883 flat_range.extent.layers = 1;
978 [[fallthrough]]; 884 [[fallthrough]];
979 case ImageViewType::e2D: 885 case ImageViewType::e2D:
980 if (True(flags & VideoCommon::ImageViewFlagBits::Slice)) { 886 if (True(flags & VideoCommon::ImageViewFlagBits::Slice)) {
@@ -984,63 +890,126 @@ ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageViewI
984 .base = {.level = info.range.base.level, .layer = 0}, 890 .base = {.level = info.range.base.level, .layer = 0},
985 .extent = {.levels = 1, .layers = 1}, 891 .extent = {.levels = 1, .layers = 1},
986 }; 892 };
987 glGenTextures(1, handles.data()); 893 full_range = slice_range;
988 SetupView(device, image, ImageViewType::e3D, handles[0], info, slice_range); 894
989 break; 895 SetupView(Shader::TextureType::Color3D);
896 } else {
897 SetupView(Shader::TextureType::Color2D);
898 SetupView(Shader::TextureType::ColorArray2D);
990 } 899 }
991 glGenTextures(2, handles.data());
992 SetupView(device, image, ImageViewType::e2D, handles[0], info, flatten_range);
993 SetupView(device, image, ImageViewType::e2DArray, handles[1], info, info.range);
994 break; 900 break;
995 case ImageViewType::e3D: 901 case ImageViewType::e3D:
996 glGenTextures(1, handles.data()); 902 SetupView(Shader::TextureType::Color3D);
997 SetupView(device, image, ImageViewType::e3D, handles[0], info, info.range);
998 break; 903 break;
999 case ImageViewType::CubeArray: 904 case ImageViewType::CubeArray:
1000 flatten_range.extent.layers = 6; 905 flat_range.extent.layers = 6;
1001 [[fallthrough]]; 906 [[fallthrough]];
1002 case ImageViewType::Cube: 907 case ImageViewType::Cube:
1003 glGenTextures(2, handles.data()); 908 SetupView(Shader::TextureType::ColorCube);
1004 SetupView(device, image, ImageViewType::Cube, handles[0], info, flatten_range); 909 SetupView(Shader::TextureType::ColorArrayCube);
1005 SetupView(device, image, ImageViewType::CubeArray, handles[1], info, info.range);
1006 break; 910 break;
1007 case ImageViewType::Rect: 911 case ImageViewType::Rect:
1008 glGenTextures(1, handles.data()); 912 UNIMPLEMENTED();
1009 SetupView(device, image, ImageViewType::Rect, handles[0], info, info.range);
1010 break; 913 break;
1011 case ImageViewType::Buffer: 914 case ImageViewType::Buffer:
1012 glCreateTextures(GL_TEXTURE_BUFFER, 1, handles.data()); 915 UNREACHABLE();
1013 SetupView(device, image, ImageViewType::Buffer, handles[0], info, info.range); 916 break;
917 }
918 switch (info.type) {
919 case ImageViewType::e1D:
920 default_handle = Handle(Shader::TextureType::Color1D);
921 break;
922 case ImageViewType::e1DArray:
923 default_handle = Handle(Shader::TextureType::ColorArray1D);
924 break;
925 case ImageViewType::e2D:
926 default_handle = Handle(Shader::TextureType::Color2D);
927 break;
928 case ImageViewType::e2DArray:
929 default_handle = Handle(Shader::TextureType::ColorArray2D);
930 break;
931 case ImageViewType::e3D:
932 default_handle = Handle(Shader::TextureType::Color3D);
933 break;
934 case ImageViewType::Cube:
935 default_handle = Handle(Shader::TextureType::ColorCube);
936 break;
937 case ImageViewType::CubeArray:
938 default_handle = Handle(Shader::TextureType::ColorArrayCube);
939 break;
940 default:
1014 break; 941 break;
1015 } 942 }
1016 default_handle = Handle(info.type);
1017} 943}
1018 944
945ImageView::ImageView(TextureCacheRuntime&, const VideoCommon::ImageInfo& info,
946 const VideoCommon::ImageViewInfo& view_info, GPUVAddr gpu_addr_)
947 : VideoCommon::ImageViewBase{info, view_info}, gpu_addr{gpu_addr_},
948 buffer_size{VideoCommon::CalculateGuestSizeInBytes(info)} {}
949
950ImageView::ImageView(TextureCacheRuntime&, const VideoCommon::ImageInfo& info,
951 const VideoCommon::ImageViewInfo& view_info)
952 : VideoCommon::ImageViewBase{info, view_info} {}
953
1019ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::NullImageParams& params) 954ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::NullImageParams& params)
1020 : VideoCommon::ImageViewBase{params}, views{runtime.null_image_views} {} 955 : VideoCommon::ImageViewBase{params}, views{runtime.null_image_views} {}
1021 956
1022void ImageView::SetupView(const Device& device, Image& image, ImageViewType view_type, 957GLuint ImageView::StorageView(Shader::TextureType texture_type, Shader::ImageFormat image_format) {
1023 GLuint handle, const VideoCommon::ImageViewInfo& info, 958 if (image_format == Shader::ImageFormat::Typeless) {
1024 VideoCommon::SubresourceRange view_range) { 959 return Handle(texture_type);
1025 if (info.type == ImageViewType::Buffer) { 960 }
1026 // TODO: Take offset from buffer cache 961 const bool is_signed{image_format == Shader::ImageFormat::R8_SINT ||
1027 glTextureBufferRange(handle, internal_format, image.buffer.handle, 0, 962 image_format == Shader::ImageFormat::R16_SINT};
1028 image.guest_size_bytes); 963 if (!storage_views) {
1029 } else { 964 storage_views = std::make_unique<StorageViews>();
1030 const GLuint parent = image.texture.handle; 965 }
1031 const GLenum target = ImageTarget(view_type, image.info.num_samples); 966 auto& type_views{is_signed ? storage_views->signeds : storage_views->unsigneds};
1032 glTextureView(handle, target, parent, internal_format, view_range.base.level, 967 GLuint& view{type_views[static_cast<size_t>(texture_type)]};
1033 view_range.extent.levels, view_range.base.layer, view_range.extent.layers); 968 if (view == 0) {
1034 if (!info.IsRenderTarget()) { 969 view = MakeView(texture_type, ShaderFormat(image_format));
1035 ApplySwizzle(handle, format, info.Swizzle()); 970 }
1036 } 971 return view;
972}
973
974void ImageView::SetupView(Shader::TextureType view_type) {
975 views[static_cast<size_t>(view_type)] = MakeView(view_type, internal_format);
976}
977
978GLuint ImageView::MakeView(Shader::TextureType view_type, GLenum view_format) {
979 VideoCommon::SubresourceRange view_range;
980 switch (view_type) {
981 case Shader::TextureType::Color1D:
982 case Shader::TextureType::Color2D:
983 case Shader::TextureType::ColorCube:
984 view_range = flat_range;
985 break;
986 case Shader::TextureType::ColorArray1D:
987 case Shader::TextureType::ColorArray2D:
988 case Shader::TextureType::Color3D:
989 case Shader::TextureType::ColorArrayCube:
990 view_range = full_range;
991 break;
992 default:
993 UNREACHABLE();
1037 } 994 }
1038 if (device.HasDebuggingToolAttached()) { 995 OGLTextureView& view = stored_views.emplace_back();
1039 const std::string name = VideoCommon::Name(*this, view_type); 996 view.Create();
1040 glObjectLabel(GL_TEXTURE, handle, static_cast<GLsizei>(name.size()), name.data()); 997
998 const GLenum target = ImageTarget(view_type, num_samples);
999 glTextureView(view.handle, target, original_texture, view_format, view_range.base.level,
1000 view_range.extent.levels, view_range.base.layer, view_range.extent.layers);
1001 if (!is_render_target) {
1002 std::array<SwizzleSource, 4> casted_swizzle;
1003 std::ranges::transform(swizzle, casted_swizzle.begin(), [](u8 component_swizzle) {
1004 return static_cast<SwizzleSource>(component_swizzle);
1005 });
1006 ApplySwizzle(view.handle, format, casted_swizzle);
1007 }
1008 if (set_object_label) {
1009 const std::string name = VideoCommon::Name(*this);
1010 glObjectLabel(GL_TEXTURE, view.handle, static_cast<GLsizei>(name.size()), name.data());
1041 } 1011 }
1042 stored_views.emplace_back().handle = handle; 1012 return view.handle;
1043 views[static_cast<size_t>(view_type)] = handle;
1044} 1013}
1045 1014
1046Sampler::Sampler(TextureCacheRuntime& runtime, const TSCEntry& config) { 1015Sampler::Sampler(TextureCacheRuntime& runtime, const TSCEntry& config) {
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.h b/src/video_core/renderer_opengl/gl_texture_cache.h
index cf3b789e3..921072ebe 100644
--- a/src/video_core/renderer_opengl/gl_texture_cache.h
+++ b/src/video_core/renderer_opengl/gl_texture_cache.h
@@ -9,6 +9,7 @@
9 9
10#include <glad/glad.h> 10#include <glad/glad.h>
11 11
12#include "shader_recompiler/shader_info.h"
12#include "video_core/renderer_opengl/gl_resource_manager.h" 13#include "video_core/renderer_opengl/gl_resource_manager.h"
13#include "video_core/renderer_opengl/util_shaders.h" 14#include "video_core/renderer_opengl/util_shaders.h"
14#include "video_core/texture_cache/texture_cache.h" 15#include "video_core/texture_cache/texture_cache.h"
@@ -127,13 +128,12 @@ private:
127 OGLTexture null_image_1d_array; 128 OGLTexture null_image_1d_array;
128 OGLTexture null_image_cube_array; 129 OGLTexture null_image_cube_array;
129 OGLTexture null_image_3d; 130 OGLTexture null_image_3d;
130 OGLTexture null_image_rect;
131 OGLTextureView null_image_view_1d; 131 OGLTextureView null_image_view_1d;
132 OGLTextureView null_image_view_2d; 132 OGLTextureView null_image_view_2d;
133 OGLTextureView null_image_view_2d_array; 133 OGLTextureView null_image_view_2d_array;
134 OGLTextureView null_image_view_cube; 134 OGLTextureView null_image_view_cube;
135 135
136 std::array<GLuint, VideoCommon::NUM_IMAGE_VIEW_TYPES> null_image_views; 136 std::array<GLuint, Shader::NUM_TEXTURE_TYPES> null_image_views{};
137}; 137};
138 138
139class Image : public VideoCommon::ImageBase { 139class Image : public VideoCommon::ImageBase {
@@ -154,8 +154,6 @@ public:
154 void UploadMemory(const ImageBufferMap& map, 154 void UploadMemory(const ImageBufferMap& map,
155 std::span<const VideoCommon::BufferImageCopy> copies); 155 std::span<const VideoCommon::BufferImageCopy> copies);
156 156
157 void UploadMemory(const ImageBufferMap& map, std::span<const VideoCommon::BufferCopy> copies);
158
159 void DownloadMemory(ImageBufferMap& map, std::span<const VideoCommon::BufferImageCopy> copies); 157 void DownloadMemory(ImageBufferMap& map, std::span<const VideoCommon::BufferImageCopy> copies);
160 158
161 GLuint StorageHandle() noexcept; 159 GLuint StorageHandle() noexcept;
@@ -170,7 +168,6 @@ private:
170 void CopyImageToBuffer(const VideoCommon::BufferImageCopy& copy, size_t buffer_offset); 168 void CopyImageToBuffer(const VideoCommon::BufferImageCopy& copy, size_t buffer_offset);
171 169
172 OGLTexture texture; 170 OGLTexture texture;
173 OGLBuffer buffer;
174 OGLTextureView store_view; 171 OGLTextureView store_view;
175 GLenum gl_internal_format = GL_NONE; 172 GLenum gl_internal_format = GL_NONE;
176 GLenum gl_format = GL_NONE; 173 GLenum gl_format = GL_NONE;
@@ -182,10 +179,17 @@ class ImageView : public VideoCommon::ImageViewBase {
182 179
183public: 180public:
184 explicit ImageView(TextureCacheRuntime&, const VideoCommon::ImageViewInfo&, ImageId, Image&); 181 explicit ImageView(TextureCacheRuntime&, const VideoCommon::ImageViewInfo&, ImageId, Image&);
182 explicit ImageView(TextureCacheRuntime&, const VideoCommon::ImageInfo&,
183 const VideoCommon::ImageViewInfo&, GPUVAddr);
184 explicit ImageView(TextureCacheRuntime&, const VideoCommon::ImageInfo& info,
185 const VideoCommon::ImageViewInfo& view_info);
185 explicit ImageView(TextureCacheRuntime&, const VideoCommon::NullImageParams&); 186 explicit ImageView(TextureCacheRuntime&, const VideoCommon::NullImageParams&);
186 187
187 [[nodiscard]] GLuint Handle(ImageViewType query_type) const noexcept { 188 [[nodiscard]] GLuint StorageView(Shader::TextureType texture_type,
188 return views[static_cast<size_t>(query_type)]; 189 Shader::ImageFormat image_format);
190
191 [[nodiscard]] GLuint Handle(Shader::TextureType handle_type) const noexcept {
192 return views[static_cast<size_t>(handle_type)];
189 } 193 }
190 194
191 [[nodiscard]] GLuint DefaultHandle() const noexcept { 195 [[nodiscard]] GLuint DefaultHandle() const noexcept {
@@ -196,15 +200,38 @@ public:
196 return internal_format; 200 return internal_format;
197 } 201 }
198 202
203 [[nodiscard]] GPUVAddr GpuAddr() const noexcept {
204 return gpu_addr;
205 }
206
207 [[nodiscard]] u32 BufferSize() const noexcept {
208 return buffer_size;
209 }
210
199private: 211private:
200 void SetupView(const Device& device, Image& image, ImageViewType view_type, GLuint handle, 212 struct StorageViews {
201 const VideoCommon::ImageViewInfo& info, 213 std::array<GLuint, Shader::NUM_TEXTURE_TYPES> signeds{};
202 VideoCommon::SubresourceRange view_range); 214 std::array<GLuint, Shader::NUM_TEXTURE_TYPES> unsigneds{};
215 };
216
217 void SetupView(Shader::TextureType view_type);
218
219 GLuint MakeView(Shader::TextureType view_type, GLenum view_format);
203 220
204 std::array<GLuint, VideoCommon::NUM_IMAGE_VIEW_TYPES> views{}; 221 std::array<GLuint, Shader::NUM_TEXTURE_TYPES> views{};
205 std::vector<OGLTextureView> stored_views; 222 std::vector<OGLTextureView> stored_views;
206 GLuint default_handle = 0; 223 std::unique_ptr<StorageViews> storage_views;
207 GLenum internal_format = GL_NONE; 224 GLenum internal_format = GL_NONE;
225 GLuint default_handle = 0;
226 GPUVAddr gpu_addr = 0;
227 u32 buffer_size = 0;
228 GLuint original_texture = 0;
229 int num_samples = 0;
230 VideoCommon::SubresourceRange flat_range;
231 VideoCommon::SubresourceRange full_range;
232 std::array<u8, 4> swizzle{};
233 bool set_object_label = false;
234 bool is_render_target = false;
208}; 235};
209 236
210class ImageAlloc : public VideoCommon::ImageAllocBase {}; 237class ImageAlloc : public VideoCommon::ImageAllocBase {};
diff --git a/src/video_core/renderer_opengl/maxwell_to_gl.h b/src/video_core/renderer_opengl/maxwell_to_gl.h
index f7ad8f370..672f94bfc 100644
--- a/src/video_core/renderer_opengl/maxwell_to_gl.h
+++ b/src/video_core/renderer_opengl/maxwell_to_gl.h
@@ -5,12 +5,120 @@
5#pragma once 5#pragma once
6 6
7#include <glad/glad.h> 7#include <glad/glad.h>
8
8#include "video_core/engines/maxwell_3d.h" 9#include "video_core/engines/maxwell_3d.h"
10#include "video_core/surface.h"
9 11
10namespace OpenGL::MaxwellToGL { 12namespace OpenGL::MaxwellToGL {
11 13
12using Maxwell = Tegra::Engines::Maxwell3D::Regs; 14using Maxwell = Tegra::Engines::Maxwell3D::Regs;
13 15
16struct FormatTuple {
17 GLenum internal_format;
18 GLenum format = GL_NONE;
19 GLenum type = GL_NONE;
20};
21
22constexpr std::array<FormatTuple, VideoCore::Surface::MaxPixelFormat> FORMAT_TABLE = {{
23 {GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV}, // A8B8G8R8_UNORM
24 {GL_RGBA8_SNORM, GL_RGBA, GL_BYTE}, // A8B8G8R8_SNORM
25 {GL_RGBA8I, GL_RGBA_INTEGER, GL_BYTE}, // A8B8G8R8_SINT
26 {GL_RGBA8UI, GL_RGBA_INTEGER, GL_UNSIGNED_BYTE}, // A8B8G8R8_UINT
27 {GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5}, // R5G6B5_UNORM
28 {GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5_REV}, // B5G6R5_UNORM
29 {GL_RGB5_A1, GL_BGRA, GL_UNSIGNED_SHORT_1_5_5_5_REV}, // A1R5G5B5_UNORM
30 {GL_RGB10_A2, GL_RGBA, GL_UNSIGNED_INT_2_10_10_10_REV}, // A2B10G10R10_UNORM
31 {GL_RGB10_A2UI, GL_RGBA_INTEGER, GL_UNSIGNED_INT_2_10_10_10_REV}, // A2B10G10R10_UINT
32 {GL_RGB5_A1, GL_RGBA, GL_UNSIGNED_SHORT_1_5_5_5_REV}, // A1B5G5R5_UNORM
33 {GL_R8, GL_RED, GL_UNSIGNED_BYTE}, // R8_UNORM
34 {GL_R8_SNORM, GL_RED, GL_BYTE}, // R8_SNORM
35 {GL_R8I, GL_RED_INTEGER, GL_BYTE}, // R8_SINT
36 {GL_R8UI, GL_RED_INTEGER, GL_UNSIGNED_BYTE}, // R8_UINT
37 {GL_RGBA16F, GL_RGBA, GL_HALF_FLOAT}, // R16G16B16A16_FLOAT
38 {GL_RGBA16, GL_RGBA, GL_UNSIGNED_SHORT}, // R16G16B16A16_UNORM
39 {GL_RGBA16_SNORM, GL_RGBA, GL_SHORT}, // R16G16B16A16_SNORM
40 {GL_RGBA16I, GL_RGBA_INTEGER, GL_SHORT}, // R16G16B16A16_SINT
41 {GL_RGBA16UI, GL_RGBA_INTEGER, GL_UNSIGNED_SHORT}, // R16G16B16A16_UINT
42 {GL_R11F_G11F_B10F, GL_RGB, GL_UNSIGNED_INT_10F_11F_11F_REV}, // B10G11R11_FLOAT
43 {GL_RGBA32UI, GL_RGBA_INTEGER, GL_UNSIGNED_INT}, // R32G32B32A32_UINT
44 {GL_COMPRESSED_RGBA_S3TC_DXT1_EXT}, // BC1_RGBA_UNORM
45 {GL_COMPRESSED_RGBA_S3TC_DXT3_EXT}, // BC2_UNORM
46 {GL_COMPRESSED_RGBA_S3TC_DXT5_EXT}, // BC3_UNORM
47 {GL_COMPRESSED_RED_RGTC1}, // BC4_UNORM
48 {GL_COMPRESSED_SIGNED_RED_RGTC1}, // BC4_SNORM
49 {GL_COMPRESSED_RG_RGTC2}, // BC5_UNORM
50 {GL_COMPRESSED_SIGNED_RG_RGTC2}, // BC5_SNORM
51 {GL_COMPRESSED_RGBA_BPTC_UNORM}, // BC7_UNORM
52 {GL_COMPRESSED_RGB_BPTC_UNSIGNED_FLOAT}, // BC6H_UFLOAT
53 {GL_COMPRESSED_RGB_BPTC_SIGNED_FLOAT}, // BC6H_SFLOAT
54 {GL_COMPRESSED_RGBA_ASTC_4x4_KHR}, // ASTC_2D_4X4_UNORM
55 {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE}, // B8G8R8A8_UNORM
56 {GL_RGBA32F, GL_RGBA, GL_FLOAT}, // R32G32B32A32_FLOAT
57 {GL_RGBA32I, GL_RGBA_INTEGER, GL_INT}, // R32G32B32A32_SINT
58 {GL_RG32F, GL_RG, GL_FLOAT}, // R32G32_FLOAT
59 {GL_RG32I, GL_RG_INTEGER, GL_INT}, // R32G32_SINT
60 {GL_R32F, GL_RED, GL_FLOAT}, // R32_FLOAT
61 {GL_R16F, GL_RED, GL_HALF_FLOAT}, // R16_FLOAT
62 {GL_R16, GL_RED, GL_UNSIGNED_SHORT}, // R16_UNORM
63 {GL_R16_SNORM, GL_RED, GL_SHORT}, // R16_SNORM
64 {GL_R16UI, GL_RED_INTEGER, GL_UNSIGNED_SHORT}, // R16_UINT
65 {GL_R16I, GL_RED_INTEGER, GL_SHORT}, // R16_SINT
66 {GL_RG16, GL_RG, GL_UNSIGNED_SHORT}, // R16G16_UNORM
67 {GL_RG16F, GL_RG, GL_HALF_FLOAT}, // R16G16_FLOAT
68 {GL_RG16UI, GL_RG_INTEGER, GL_UNSIGNED_SHORT}, // R16G16_UINT
69 {GL_RG16I, GL_RG_INTEGER, GL_SHORT}, // R16G16_SINT
70 {GL_RG16_SNORM, GL_RG, GL_SHORT}, // R16G16_SNORM
71 {GL_RGB32F, GL_RGB, GL_FLOAT}, // R32G32B32_FLOAT
72 {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV}, // A8B8G8R8_SRGB
73 {GL_RG8, GL_RG, GL_UNSIGNED_BYTE}, // R8G8_UNORM
74 {GL_RG8_SNORM, GL_RG, GL_BYTE}, // R8G8_SNORM
75 {GL_RG8I, GL_RG_INTEGER, GL_BYTE}, // R8G8_SINT
76 {GL_RG8UI, GL_RG_INTEGER, GL_UNSIGNED_BYTE}, // R8G8_UINT
77 {GL_RG32UI, GL_RG_INTEGER, GL_UNSIGNED_INT}, // R32G32_UINT
78 {GL_RGB16F, GL_RGBA, GL_HALF_FLOAT}, // R16G16B16X16_FLOAT
79 {GL_R32UI, GL_RED_INTEGER, GL_UNSIGNED_INT}, // R32_UINT
80 {GL_R32I, GL_RED_INTEGER, GL_INT}, // R32_SINT
81 {GL_COMPRESSED_RGBA_ASTC_8x8_KHR}, // ASTC_2D_8X8_UNORM
82 {GL_COMPRESSED_RGBA_ASTC_8x5_KHR}, // ASTC_2D_8X5_UNORM
83 {GL_COMPRESSED_RGBA_ASTC_5x4_KHR}, // ASTC_2D_5X4_UNORM
84 {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE}, // B8G8R8A8_SRGB
85 {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT1_EXT}, // BC1_RGBA_SRGB
86 {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT3_EXT}, // BC2_SRGB
87 {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT}, // BC3_SRGB
88 {GL_COMPRESSED_SRGB_ALPHA_BPTC_UNORM}, // BC7_SRGB
89 {GL_RGBA4, GL_RGBA, GL_UNSIGNED_SHORT_4_4_4_4_REV}, // A4B4G4R4_UNORM
90 {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_4x4_KHR}, // ASTC_2D_4X4_SRGB
91 {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x8_KHR}, // ASTC_2D_8X8_SRGB
92 {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x5_KHR}, // ASTC_2D_8X5_SRGB
93 {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x4_KHR}, // ASTC_2D_5X4_SRGB
94 {GL_COMPRESSED_RGBA_ASTC_5x5_KHR}, // ASTC_2D_5X5_UNORM
95 {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x5_KHR}, // ASTC_2D_5X5_SRGB
96 {GL_COMPRESSED_RGBA_ASTC_10x8_KHR}, // ASTC_2D_10X8_UNORM
97 {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x8_KHR}, // ASTC_2D_10X8_SRGB
98 {GL_COMPRESSED_RGBA_ASTC_6x6_KHR}, // ASTC_2D_6X6_UNORM
99 {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x6_KHR}, // ASTC_2D_6X6_SRGB
100 {GL_COMPRESSED_RGBA_ASTC_10x10_KHR}, // ASTC_2D_10X10_UNORM
101 {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x10_KHR}, // ASTC_2D_10X10_SRGB
102 {GL_COMPRESSED_RGBA_ASTC_12x12_KHR}, // ASTC_2D_12X12_UNORM
103 {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_12x12_KHR}, // ASTC_2D_12X12_SRGB
104 {GL_COMPRESSED_RGBA_ASTC_8x6_KHR}, // ASTC_2D_8X6_UNORM
105 {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x6_KHR}, // ASTC_2D_8X6_SRGB
106 {GL_COMPRESSED_RGBA_ASTC_6x5_KHR}, // ASTC_2D_6X5_UNORM
107 {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x5_KHR}, // ASTC_2D_6X5_SRGB
108 {GL_RGB9_E5, GL_RGB, GL_UNSIGNED_INT_5_9_9_9_REV}, // E5B9G9R9_FLOAT
109 {GL_DEPTH_COMPONENT32F, GL_DEPTH_COMPONENT, GL_FLOAT}, // D32_FLOAT
110 {GL_DEPTH_COMPONENT16, GL_DEPTH_COMPONENT, GL_UNSIGNED_SHORT}, // D16_UNORM
111 {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8}, // D24_UNORM_S8_UINT
112 {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8}, // S8_UINT_D24_UNORM
113 {GL_DEPTH32F_STENCIL8, GL_DEPTH_STENCIL,
114 GL_FLOAT_32_UNSIGNED_INT_24_8_REV}, // D32_FLOAT_S8_UINT
115}};
116
117inline const FormatTuple& GetFormatTuple(VideoCore::Surface::PixelFormat pixel_format) {
118 ASSERT(static_cast<size_t>(pixel_format) < FORMAT_TABLE.size());
119 return FORMAT_TABLE[static_cast<size_t>(pixel_format)];
120}
121
14inline GLenum VertexFormat(Maxwell::VertexAttribute attrib) { 122inline GLenum VertexFormat(Maxwell::VertexAttribute attrib) {
15 switch (attrib.type) { 123 switch (attrib.type) {
16 case Maxwell::VertexAttribute::Type::UnsignedNorm: 124 case Maxwell::VertexAttribute::Type::UnsignedNorm:
diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp
index c12929de6..285e78384 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.cpp
+++ b/src/video_core/renderer_opengl/renderer_opengl.cpp
@@ -25,6 +25,7 @@
25#include "video_core/host_shaders/opengl_present_vert.h" 25#include "video_core/host_shaders/opengl_present_vert.h"
26#include "video_core/renderer_opengl/gl_rasterizer.h" 26#include "video_core/renderer_opengl/gl_rasterizer.h"
27#include "video_core/renderer_opengl/gl_shader_manager.h" 27#include "video_core/renderer_opengl/gl_shader_manager.h"
28#include "video_core/renderer_opengl/gl_shader_util.h"
28#include "video_core/renderer_opengl/renderer_opengl.h" 29#include "video_core/renderer_opengl/renderer_opengl.h"
29#include "video_core/textures/decoders.h" 30#include "video_core/textures/decoders.h"
30 31
@@ -139,6 +140,26 @@ RendererOpenGL::RendererOpenGL(Core::TelemetrySession& telemetry_session_,
139 } 140 }
140 AddTelemetryFields(); 141 AddTelemetryFields();
141 InitOpenGLObjects(); 142 InitOpenGLObjects();
143
144 // Initialize default attributes to match hardware's disabled attributes
145 GLint max_attribs{};
146 glGetIntegerv(GL_MAX_VERTEX_ATTRIBS, &max_attribs);
147 for (GLint attrib = 0; attrib < max_attribs; ++attrib) {
148 glVertexAttrib4f(attrib, 0.0f, 0.0f, 0.0f, 1.0f);
149 }
150 // Enable seamless cubemaps when per texture parameters are not available
151 if (!GLAD_GL_ARB_seamless_cubemap_per_texture && !GLAD_GL_AMD_seamless_cubemap_per_texture) {
152 glEnable(GL_TEXTURE_CUBE_MAP_SEAMLESS);
153 }
154 // Enable unified vertex attributes and query vertex buffer address when the driver supports it
155 if (device.HasVertexBufferUnifiedMemory()) {
156 glEnableClientState(GL_VERTEX_ATTRIB_ARRAY_UNIFIED_NV);
157 glEnableClientState(GL_ELEMENT_ARRAY_UNIFIED_NV);
158
159 glMakeNamedBufferResidentNV(vertex_buffer.handle, GL_READ_ONLY);
160 glGetNamedBufferParameterui64vNV(vertex_buffer.handle, GL_BUFFER_GPU_ADDRESS_NV,
161 &vertex_buffer_address);
162 }
142} 163}
143 164
144RendererOpenGL::~RendererOpenGL() = default; 165RendererOpenGL::~RendererOpenGL() = default;
@@ -230,18 +251,8 @@ void RendererOpenGL::LoadColorToActiveGLTexture(u8 color_r, u8 color_g, u8 color
230 251
231void RendererOpenGL::InitOpenGLObjects() { 252void RendererOpenGL::InitOpenGLObjects() {
232 // Create shader programs 253 // Create shader programs
233 OGLShader vertex_shader; 254 present_vertex = CreateProgram(HostShaders::OPENGL_PRESENT_VERT, GL_VERTEX_SHADER);
234 vertex_shader.Create(HostShaders::OPENGL_PRESENT_VERT, GL_VERTEX_SHADER); 255 present_fragment = CreateProgram(HostShaders::OPENGL_PRESENT_FRAG, GL_FRAGMENT_SHADER);
235
236 OGLShader fragment_shader;
237 fragment_shader.Create(HostShaders::OPENGL_PRESENT_FRAG, GL_FRAGMENT_SHADER);
238
239 vertex_program.Create(true, false, vertex_shader.handle);
240 fragment_program.Create(true, false, fragment_shader.handle);
241
242 pipeline.Create();
243 glUseProgramStages(pipeline.handle, GL_VERTEX_SHADER_BIT, vertex_program.handle);
244 glUseProgramStages(pipeline.handle, GL_FRAGMENT_SHADER_BIT, fragment_program.handle);
245 256
246 // Generate presentation sampler 257 // Generate presentation sampler
247 present_sampler.Create(); 258 present_sampler.Create();
@@ -263,21 +274,6 @@ void RendererOpenGL::InitOpenGLObjects() {
263 274
264 // Clear screen to black 275 // Clear screen to black
265 LoadColorToActiveGLTexture(0, 0, 0, 0, screen_info.texture); 276 LoadColorToActiveGLTexture(0, 0, 0, 0, screen_info.texture);
266
267 // Enable seamless cubemaps when per texture parameters are not available
268 if (!GLAD_GL_ARB_seamless_cubemap_per_texture && !GLAD_GL_AMD_seamless_cubemap_per_texture) {
269 glEnable(GL_TEXTURE_CUBE_MAP_SEAMLESS);
270 }
271
272 // Enable unified vertex attributes and query vertex buffer address when the driver supports it
273 if (device.HasVertexBufferUnifiedMemory()) {
274 glEnableClientState(GL_VERTEX_ATTRIB_ARRAY_UNIFIED_NV);
275 glEnableClientState(GL_ELEMENT_ARRAY_UNIFIED_NV);
276
277 glMakeNamedBufferResidentNV(vertex_buffer.handle, GL_READ_ONLY);
278 glGetNamedBufferParameterui64vNV(vertex_buffer.handle, GL_BUFFER_GPU_ADDRESS_NV,
279 &vertex_buffer_address);
280 }
281} 277}
282 278
283void RendererOpenGL::AddTelemetryFields() { 279void RendererOpenGL::AddTelemetryFields() {
@@ -342,8 +338,9 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) {
342 // Set projection matrix 338 // Set projection matrix
343 const std::array ortho_matrix = 339 const std::array ortho_matrix =
344 MakeOrthographicMatrix(static_cast<float>(layout.width), static_cast<float>(layout.height)); 340 MakeOrthographicMatrix(static_cast<float>(layout.width), static_cast<float>(layout.height));
345 glProgramUniformMatrix3x2fv(vertex_program.handle, ModelViewMatrixLocation, 1, GL_FALSE, 341 program_manager.BindPresentPrograms(present_vertex.handle, present_fragment.handle);
346 std::data(ortho_matrix)); 342 glProgramUniformMatrix3x2fv(present_vertex.handle, ModelViewMatrixLocation, 1, GL_FALSE,
343 ortho_matrix.data());
347 344
348 const auto& texcoords = screen_info.display_texcoords; 345 const auto& texcoords = screen_info.display_texcoords;
349 auto left = texcoords.left; 346 auto left = texcoords.left;
@@ -404,8 +401,6 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) {
404 state_tracker.NotifyClipControl(); 401 state_tracker.NotifyClipControl();
405 state_tracker.NotifyAlphaTest(); 402 state_tracker.NotifyAlphaTest();
406 403
407 program_manager.BindHostPipeline(pipeline.handle);
408
409 state_tracker.ClipControl(GL_LOWER_LEFT, GL_ZERO_TO_ONE); 404 state_tracker.ClipControl(GL_LOWER_LEFT, GL_ZERO_TO_ONE);
410 glEnable(GL_CULL_FACE); 405 glEnable(GL_CULL_FACE);
411 if (screen_info.display_srgb) { 406 if (screen_info.display_srgb) {
@@ -453,7 +448,8 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) {
453 glClear(GL_COLOR_BUFFER_BIT); 448 glClear(GL_COLOR_BUFFER_BIT);
454 glDrawArrays(GL_TRIANGLE_STRIP, 0, 4); 449 glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);
455 450
456 program_manager.RestoreGuestPipeline(); 451 // TODO
452 // program_manager.RestoreGuestPipeline();
457} 453}
458 454
459void RendererOpenGL::RenderScreenshot() { 455void RendererOpenGL::RenderScreenshot() {
diff --git a/src/video_core/renderer_opengl/renderer_opengl.h b/src/video_core/renderer_opengl/renderer_opengl.h
index 0b66f8332..d455f572f 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.h
+++ b/src/video_core/renderer_opengl/renderer_opengl.h
@@ -12,7 +12,6 @@
12#include "video_core/renderer_opengl/gl_device.h" 12#include "video_core/renderer_opengl/gl_device.h"
13#include "video_core/renderer_opengl/gl_rasterizer.h" 13#include "video_core/renderer_opengl/gl_rasterizer.h"
14#include "video_core/renderer_opengl/gl_resource_manager.h" 14#include "video_core/renderer_opengl/gl_resource_manager.h"
15#include "video_core/renderer_opengl/gl_shader_manager.h"
16#include "video_core/renderer_opengl/gl_state_tracker.h" 15#include "video_core/renderer_opengl/gl_state_tracker.h"
17 16
18namespace Core { 17namespace Core {
@@ -111,9 +110,8 @@ private:
111 // OpenGL object IDs 110 // OpenGL object IDs
112 OGLSampler present_sampler; 111 OGLSampler present_sampler;
113 OGLBuffer vertex_buffer; 112 OGLBuffer vertex_buffer;
114 OGLProgram vertex_program; 113 OGLProgram present_vertex;
115 OGLProgram fragment_program; 114 OGLProgram present_fragment;
116 OGLPipeline pipeline;
117 OGLFramebuffer screenshot_framebuffer; 115 OGLFramebuffer screenshot_framebuffer;
118 116
119 // GPU address of the vertex buffer 117 // GPU address of the vertex buffer
diff --git a/src/video_core/renderer_opengl/util_shaders.cpp b/src/video_core/renderer_opengl/util_shaders.cpp
index 8fb5be393..37a4d1d9d 100644
--- a/src/video_core/renderer_opengl/util_shaders.cpp
+++ b/src/video_core/renderer_opengl/util_shaders.cpp
@@ -16,8 +16,8 @@
16#include "video_core/host_shaders/opengl_copy_bc4_comp.h" 16#include "video_core/host_shaders/opengl_copy_bc4_comp.h"
17#include "video_core/host_shaders/opengl_copy_bgra_comp.h" 17#include "video_core/host_shaders/opengl_copy_bgra_comp.h"
18#include "video_core/host_shaders/pitch_unswizzle_comp.h" 18#include "video_core/host_shaders/pitch_unswizzle_comp.h"
19#include "video_core/renderer_opengl/gl_resource_manager.h"
20#include "video_core/renderer_opengl/gl_shader_manager.h" 19#include "video_core/renderer_opengl/gl_shader_manager.h"
20#include "video_core/renderer_opengl/gl_shader_util.h"
21#include "video_core/renderer_opengl/gl_texture_cache.h" 21#include "video_core/renderer_opengl/gl_texture_cache.h"
22#include "video_core/renderer_opengl/util_shaders.h" 22#include "video_core/renderer_opengl/util_shaders.h"
23#include "video_core/texture_cache/accelerated_swizzle.h" 23#include "video_core/texture_cache/accelerated_swizzle.h"
@@ -41,21 +41,14 @@ using VideoCommon::Accelerated::MakeBlockLinearSwizzle3DParams;
41using VideoCore::Surface::BytesPerBlock; 41using VideoCore::Surface::BytesPerBlock;
42 42
43namespace { 43namespace {
44
45OGLProgram MakeProgram(std::string_view source) { 44OGLProgram MakeProgram(std::string_view source) {
46 OGLShader shader; 45 return CreateProgram(source, GL_COMPUTE_SHADER);
47 shader.Create(source, GL_COMPUTE_SHADER);
48
49 OGLProgram program;
50 program.Create(true, false, shader.handle);
51 return program;
52} 46}
53 47
54size_t NumPixelsInCopy(const VideoCommon::ImageCopy& copy) { 48size_t NumPixelsInCopy(const VideoCommon::ImageCopy& copy) {
55 return static_cast<size_t>(copy.extent.width * copy.extent.height * 49 return static_cast<size_t>(copy.extent.width * copy.extent.height *
56 copy.src_subresource.num_layers); 50 copy.src_subresource.num_layers);
57} 51}
58
59} // Anonymous namespace 52} // Anonymous namespace
60 53
61UtilShaders::UtilShaders(ProgramManager& program_manager_) 54UtilShaders::UtilShaders(ProgramManager& program_manager_)
@@ -86,7 +79,7 @@ void UtilShaders::ASTCDecode(Image& image, const ImageBufferMap& map,
86 .width = VideoCore::Surface::DefaultBlockWidth(image.info.format), 79 .width = VideoCore::Surface::DefaultBlockWidth(image.info.format),
87 .height = VideoCore::Surface::DefaultBlockHeight(image.info.format), 80 .height = VideoCore::Surface::DefaultBlockHeight(image.info.format),
88 }; 81 };
89 program_manager.BindHostCompute(astc_decoder_program.handle); 82 program_manager.BindComputeProgram(astc_decoder_program.handle);
90 glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_SWIZZLE_BUFFER, swizzle_table_buffer.handle); 83 glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_SWIZZLE_BUFFER, swizzle_table_buffer.handle);
91 glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_ENC_BUFFER, astc_buffer.handle); 84 glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_ENC_BUFFER, astc_buffer.handle);
92 85
@@ -134,7 +127,7 @@ void UtilShaders::BlockLinearUpload2D(Image& image, const ImageBufferMap& map,
134 static constexpr GLuint BINDING_INPUT_BUFFER = 1; 127 static constexpr GLuint BINDING_INPUT_BUFFER = 1;
135 static constexpr GLuint BINDING_OUTPUT_IMAGE = 0; 128 static constexpr GLuint BINDING_OUTPUT_IMAGE = 0;
136 129
137 program_manager.BindHostCompute(block_linear_unswizzle_2d_program.handle); 130 program_manager.BindComputeProgram(block_linear_unswizzle_2d_program.handle);
138 glFlushMappedNamedBufferRange(map.buffer, map.offset, image.guest_size_bytes); 131 glFlushMappedNamedBufferRange(map.buffer, map.offset, image.guest_size_bytes);
139 glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_SWIZZLE_BUFFER, swizzle_table_buffer.handle); 132 glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_SWIZZLE_BUFFER, swizzle_table_buffer.handle);
140 133
@@ -173,7 +166,7 @@ void UtilShaders::BlockLinearUpload3D(Image& image, const ImageBufferMap& map,
173 static constexpr GLuint BINDING_OUTPUT_IMAGE = 0; 166 static constexpr GLuint BINDING_OUTPUT_IMAGE = 0;
174 167
175 glFlushMappedNamedBufferRange(map.buffer, map.offset, image.guest_size_bytes); 168 glFlushMappedNamedBufferRange(map.buffer, map.offset, image.guest_size_bytes);
176 program_manager.BindHostCompute(block_linear_unswizzle_3d_program.handle); 169 program_manager.BindComputeProgram(block_linear_unswizzle_3d_program.handle);
177 glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_SWIZZLE_BUFFER, swizzle_table_buffer.handle); 170 glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_SWIZZLE_BUFFER, swizzle_table_buffer.handle);
178 171
179 const GLenum store_format = StoreFormat(BytesPerBlock(image.info.format)); 172 const GLenum store_format = StoreFormat(BytesPerBlock(image.info.format));
@@ -222,7 +215,7 @@ void UtilShaders::PitchUpload(Image& image, const ImageBufferMap& map,
222 UNIMPLEMENTED_IF_MSG(!std::has_single_bit(bytes_per_block), 215 UNIMPLEMENTED_IF_MSG(!std::has_single_bit(bytes_per_block),
223 "Non-power of two images are not implemented"); 216 "Non-power of two images are not implemented");
224 217
225 program_manager.BindHostCompute(pitch_unswizzle_program.handle); 218 program_manager.BindComputeProgram(pitch_unswizzle_program.handle);
226 glFlushMappedNamedBufferRange(map.buffer, map.offset, image.guest_size_bytes); 219 glFlushMappedNamedBufferRange(map.buffer, map.offset, image.guest_size_bytes);
227 glUniform2ui(LOC_ORIGIN, 0, 0); 220 glUniform2ui(LOC_ORIGIN, 0, 0);
228 glUniform2i(LOC_DESTINATION, 0, 0); 221 glUniform2i(LOC_DESTINATION, 0, 0);
@@ -250,7 +243,7 @@ void UtilShaders::CopyBC4(Image& dst_image, Image& src_image, std::span<const Im
250 static constexpr GLuint LOC_SRC_OFFSET = 0; 243 static constexpr GLuint LOC_SRC_OFFSET = 0;
251 static constexpr GLuint LOC_DST_OFFSET = 1; 244 static constexpr GLuint LOC_DST_OFFSET = 1;
252 245
253 program_manager.BindHostCompute(copy_bc4_program.handle); 246 program_manager.BindComputeProgram(copy_bc4_program.handle);
254 247
255 for (const ImageCopy& copy : copies) { 248 for (const ImageCopy& copy : copies) {
256 ASSERT(copy.src_subresource.base_layer == 0); 249 ASSERT(copy.src_subresource.base_layer == 0);
@@ -286,7 +279,7 @@ void UtilShaders::CopyBGR(Image& dst_image, Image& src_image,
286 break; 279 break;
287 case 4: { 280 case 4: {
288 // BGRA8 copy 281 // BGRA8 copy
289 program_manager.BindHostCompute(copy_bgra_program.handle); 282 program_manager.BindComputeProgram(copy_bgra_program.handle);
290 constexpr GLenum FORMAT = GL_RGBA8; 283 constexpr GLenum FORMAT = GL_RGBA8;
291 for (const ImageCopy& copy : copies) { 284 for (const ImageCopy& copy : copies) {
292 ASSERT(copy.src_offset == zero_offset); 285 ASSERT(copy.src_offset == zero_offset);
diff --git a/src/video_core/renderer_vulkan/blit_image.cpp b/src/video_core/renderer_vulkan/blit_image.cpp
index b7f5b8bc2..6c1b2f063 100644
--- a/src/video_core/renderer_vulkan/blit_image.cpp
+++ b/src/video_core/renderer_vulkan/blit_image.cpp
@@ -49,6 +49,16 @@ constexpr VkDescriptorSetLayoutCreateInfo ONE_TEXTURE_DESCRIPTOR_SET_LAYOUT_CREA
49 .bindingCount = 1, 49 .bindingCount = 1,
50 .pBindings = &TEXTURE_DESCRIPTOR_SET_LAYOUT_BINDING<0>, 50 .pBindings = &TEXTURE_DESCRIPTOR_SET_LAYOUT_BINDING<0>,
51}; 51};
52template <u32 num_textures>
53inline constexpr DescriptorBankInfo TEXTURE_DESCRIPTOR_BANK_INFO{
54 .uniform_buffers = 0,
55 .storage_buffers = 0,
56 .texture_buffers = 0,
57 .image_buffers = 0,
58 .textures = num_textures,
59 .images = 0,
60 .score = 2,
61};
52constexpr VkDescriptorSetLayoutCreateInfo TWO_TEXTURES_DESCRIPTOR_SET_LAYOUT_CREATE_INFO{ 62constexpr VkDescriptorSetLayoutCreateInfo TWO_TEXTURES_DESCRIPTOR_SET_LAYOUT_CREATE_INFO{
53 .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, 63 .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
54 .pNext = nullptr, 64 .pNext = nullptr,
@@ -323,18 +333,19 @@ void BindBlitState(vk::CommandBuffer cmdbuf, VkPipelineLayout layout, const Regi
323 cmdbuf.SetScissor(0, scissor); 333 cmdbuf.SetScissor(0, scissor);
324 cmdbuf.PushConstants(layout, VK_SHADER_STAGE_VERTEX_BIT, push_constants); 334 cmdbuf.PushConstants(layout, VK_SHADER_STAGE_VERTEX_BIT, push_constants);
325} 335}
326
327} // Anonymous namespace 336} // Anonymous namespace
328 337
329BlitImageHelper::BlitImageHelper(const Device& device_, VKScheduler& scheduler_, 338BlitImageHelper::BlitImageHelper(const Device& device_, VKScheduler& scheduler_,
330 StateTracker& state_tracker_, VKDescriptorPool& descriptor_pool) 339 StateTracker& state_tracker_, DescriptorPool& descriptor_pool)
331 : device{device_}, scheduler{scheduler_}, state_tracker{state_tracker_}, 340 : device{device_}, scheduler{scheduler_}, state_tracker{state_tracker_},
332 one_texture_set_layout(device.GetLogical().CreateDescriptorSetLayout( 341 one_texture_set_layout(device.GetLogical().CreateDescriptorSetLayout(
333 ONE_TEXTURE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO)), 342 ONE_TEXTURE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO)),
334 two_textures_set_layout(device.GetLogical().CreateDescriptorSetLayout( 343 two_textures_set_layout(device.GetLogical().CreateDescriptorSetLayout(
335 TWO_TEXTURES_DESCRIPTOR_SET_LAYOUT_CREATE_INFO)), 344 TWO_TEXTURES_DESCRIPTOR_SET_LAYOUT_CREATE_INFO)),
336 one_texture_descriptor_allocator(descriptor_pool, *one_texture_set_layout), 345 one_texture_descriptor_allocator{
337 two_textures_descriptor_allocator(descriptor_pool, *two_textures_set_layout), 346 descriptor_pool.Allocator(*one_texture_set_layout, TEXTURE_DESCRIPTOR_BANK_INFO<1>)},
347 two_textures_descriptor_allocator{
348 descriptor_pool.Allocator(*two_textures_set_layout, TEXTURE_DESCRIPTOR_BANK_INFO<2>)},
338 one_texture_pipeline_layout(device.GetLogical().CreatePipelineLayout( 349 one_texture_pipeline_layout(device.GetLogical().CreatePipelineLayout(
339 PipelineLayoutCreateInfo(one_texture_set_layout.address()))), 350 PipelineLayoutCreateInfo(one_texture_set_layout.address()))),
340 two_textures_pipeline_layout(device.GetLogical().CreatePipelineLayout( 351 two_textures_pipeline_layout(device.GetLogical().CreatePipelineLayout(
@@ -362,14 +373,14 @@ void BlitImageHelper::BlitColor(const Framebuffer* dst_framebuffer, const ImageV
362 .operation = operation, 373 .operation = operation,
363 }; 374 };
364 const VkPipelineLayout layout = *one_texture_pipeline_layout; 375 const VkPipelineLayout layout = *one_texture_pipeline_layout;
365 const VkImageView src_view = src_image_view.Handle(ImageViewType::e2D); 376 const VkImageView src_view = src_image_view.Handle(Shader::TextureType::Color2D);
366 const VkSampler sampler = is_linear ? *linear_sampler : *nearest_sampler; 377 const VkSampler sampler = is_linear ? *linear_sampler : *nearest_sampler;
367 const VkPipeline pipeline = FindOrEmplacePipeline(key); 378 const VkPipeline pipeline = FindOrEmplacePipeline(key);
368 const VkDescriptorSet descriptor_set = one_texture_descriptor_allocator.Commit();
369 scheduler.RequestRenderpass(dst_framebuffer); 379 scheduler.RequestRenderpass(dst_framebuffer);
370 scheduler.Record([dst_region, src_region, pipeline, layout, sampler, src_view, descriptor_set, 380 scheduler.Record([this, dst_region, src_region, pipeline, layout, sampler,
371 &device = device](vk::CommandBuffer cmdbuf) { 381 src_view](vk::CommandBuffer cmdbuf) {
372 // TODO: Barriers 382 // TODO: Barriers
383 const VkDescriptorSet descriptor_set = one_texture_descriptor_allocator.Commit();
373 UpdateOneTextureDescriptorSet(device, descriptor_set, sampler, src_view); 384 UpdateOneTextureDescriptorSet(device, descriptor_set, sampler, src_view);
374 cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline); 385 cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline);
375 cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, layout, 0, descriptor_set, 386 cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, layout, 0, descriptor_set,
@@ -391,12 +402,11 @@ void BlitImageHelper::BlitDepthStencil(const Framebuffer* dst_framebuffer,
391 const VkPipelineLayout layout = *two_textures_pipeline_layout; 402 const VkPipelineLayout layout = *two_textures_pipeline_layout;
392 const VkSampler sampler = *nearest_sampler; 403 const VkSampler sampler = *nearest_sampler;
393 const VkPipeline pipeline = BlitDepthStencilPipeline(dst_framebuffer->RenderPass()); 404 const VkPipeline pipeline = BlitDepthStencilPipeline(dst_framebuffer->RenderPass());
394 const VkDescriptorSet descriptor_set = two_textures_descriptor_allocator.Commit();
395 scheduler.RequestRenderpass(dst_framebuffer); 405 scheduler.RequestRenderpass(dst_framebuffer);
396 scheduler.Record([dst_region, src_region, pipeline, layout, sampler, src_depth_view, 406 scheduler.Record([dst_region, src_region, pipeline, layout, sampler, src_depth_view,
397 src_stencil_view, descriptor_set, 407 src_stencil_view, this](vk::CommandBuffer cmdbuf) {
398 &device = device](vk::CommandBuffer cmdbuf) {
399 // TODO: Barriers 408 // TODO: Barriers
409 const VkDescriptorSet descriptor_set = two_textures_descriptor_allocator.Commit();
400 UpdateTwoTexturesDescriptorSet(device, descriptor_set, sampler, src_depth_view, 410 UpdateTwoTexturesDescriptorSet(device, descriptor_set, sampler, src_depth_view,
401 src_stencil_view); 411 src_stencil_view);
402 cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline); 412 cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline);
@@ -416,7 +426,6 @@ void BlitImageHelper::ConvertD32ToR32(const Framebuffer* dst_framebuffer,
416 426
417void BlitImageHelper::ConvertR32ToD32(const Framebuffer* dst_framebuffer, 427void BlitImageHelper::ConvertR32ToD32(const Framebuffer* dst_framebuffer,
418 const ImageView& src_image_view) { 428 const ImageView& src_image_view) {
419
420 ConvertColorToDepthPipeline(convert_r32_to_d32_pipeline, dst_framebuffer->RenderPass()); 429 ConvertColorToDepthPipeline(convert_r32_to_d32_pipeline, dst_framebuffer->RenderPass());
421 Convert(*convert_r32_to_d32_pipeline, dst_framebuffer, src_image_view); 430 Convert(*convert_r32_to_d32_pipeline, dst_framebuffer, src_image_view);
422} 431}
@@ -436,16 +445,14 @@ void BlitImageHelper::ConvertR16ToD16(const Framebuffer* dst_framebuffer,
436void BlitImageHelper::Convert(VkPipeline pipeline, const Framebuffer* dst_framebuffer, 445void BlitImageHelper::Convert(VkPipeline pipeline, const Framebuffer* dst_framebuffer,
437 const ImageView& src_image_view) { 446 const ImageView& src_image_view) {
438 const VkPipelineLayout layout = *one_texture_pipeline_layout; 447 const VkPipelineLayout layout = *one_texture_pipeline_layout;
439 const VkImageView src_view = src_image_view.Handle(ImageViewType::e2D); 448 const VkImageView src_view = src_image_view.Handle(Shader::TextureType::Color2D);
440 const VkSampler sampler = *nearest_sampler; 449 const VkSampler sampler = *nearest_sampler;
441 const VkDescriptorSet descriptor_set = one_texture_descriptor_allocator.Commit();
442 const VkExtent2D extent{ 450 const VkExtent2D extent{
443 .width = src_image_view.size.width, 451 .width = src_image_view.size.width,
444 .height = src_image_view.size.height, 452 .height = src_image_view.size.height,
445 }; 453 };
446 scheduler.RequestRenderpass(dst_framebuffer); 454 scheduler.RequestRenderpass(dst_framebuffer);
447 scheduler.Record([pipeline, layout, sampler, src_view, descriptor_set, extent, 455 scheduler.Record([pipeline, layout, sampler, src_view, extent, this](vk::CommandBuffer cmdbuf) {
448 &device = device](vk::CommandBuffer cmdbuf) {
449 const VkOffset2D offset{ 456 const VkOffset2D offset{
450 .x = 0, 457 .x = 0,
451 .y = 0, 458 .y = 0,
@@ -466,6 +473,7 @@ void BlitImageHelper::Convert(VkPipeline pipeline, const Framebuffer* dst_frameb
466 .tex_scale = {viewport.width, viewport.height}, 473 .tex_scale = {viewport.width, viewport.height},
467 .tex_offset = {0.0f, 0.0f}, 474 .tex_offset = {0.0f, 0.0f},
468 }; 475 };
476 const VkDescriptorSet descriptor_set = one_texture_descriptor_allocator.Commit();
469 UpdateOneTextureDescriptorSet(device, descriptor_set, sampler, src_view); 477 UpdateOneTextureDescriptorSet(device, descriptor_set, sampler, src_view);
470 478
471 // TODO: Barriers 479 // TODO: Barriers
diff --git a/src/video_core/renderer_vulkan/blit_image.h b/src/video_core/renderer_vulkan/blit_image.h
index 0d81a06ed..33ee095c1 100644
--- a/src/video_core/renderer_vulkan/blit_image.h
+++ b/src/video_core/renderer_vulkan/blit_image.h
@@ -31,7 +31,7 @@ struct BlitImagePipelineKey {
31class BlitImageHelper { 31class BlitImageHelper {
32public: 32public:
33 explicit BlitImageHelper(const Device& device, VKScheduler& scheduler, 33 explicit BlitImageHelper(const Device& device, VKScheduler& scheduler,
34 StateTracker& state_tracker, VKDescriptorPool& descriptor_pool); 34 StateTracker& state_tracker, DescriptorPool& descriptor_pool);
35 ~BlitImageHelper(); 35 ~BlitImageHelper();
36 36
37 void BlitColor(const Framebuffer* dst_framebuffer, const ImageView& src_image_view, 37 void BlitColor(const Framebuffer* dst_framebuffer, const ImageView& src_image_view,
diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp
index 362278f01..d70153df3 100644
--- a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp
+++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp
@@ -15,9 +15,7 @@
15#include "video_core/renderer_vulkan/vk_state_tracker.h" 15#include "video_core/renderer_vulkan/vk_state_tracker.h"
16 16
17namespace Vulkan { 17namespace Vulkan {
18
19namespace { 18namespace {
20
21constexpr size_t POINT = 0; 19constexpr size_t POINT = 0;
22constexpr size_t LINE = 1; 20constexpr size_t LINE = 1;
23constexpr size_t POLYGON = 2; 21constexpr size_t POLYGON = 2;
@@ -39,10 +37,20 @@ constexpr std::array POLYGON_OFFSET_ENABLE_LUT = {
39 POLYGON, // Patches 37 POLYGON, // Patches
40}; 38};
41 39
40void RefreshXfbState(VideoCommon::TransformFeedbackState& state, const Maxwell& regs) {
41 std::ranges::transform(regs.tfb_layouts, state.layouts.begin(), [](const auto& layout) {
42 return VideoCommon::TransformFeedbackState::Layout{
43 .stream = layout.stream,
44 .varying_count = layout.varying_count,
45 .stride = layout.stride,
46 };
47 });
48 state.varyings = regs.tfb_varying_locs;
49}
42} // Anonymous namespace 50} // Anonymous namespace
43 51
44void FixedPipelineState::Refresh(Tegra::Engines::Maxwell3D& maxwell3d, 52void FixedPipelineState::Refresh(Tegra::Engines::Maxwell3D& maxwell3d,
45 bool has_extended_dynamic_state) { 53 bool has_extended_dynamic_state, bool has_dynamic_vertex_input) {
46 const Maxwell& regs = maxwell3d.regs; 54 const Maxwell& regs = maxwell3d.regs;
47 const std::array enabled_lut{ 55 const std::array enabled_lut{
48 regs.polygon_offset_point_enable, 56 regs.polygon_offset_point_enable,
@@ -52,6 +60,9 @@ void FixedPipelineState::Refresh(Tegra::Engines::Maxwell3D& maxwell3d,
52 const u32 topology_index = static_cast<u32>(regs.draw.topology.Value()); 60 const u32 topology_index = static_cast<u32>(regs.draw.topology.Value());
53 61
54 raw1 = 0; 62 raw1 = 0;
63 extended_dynamic_state.Assign(has_extended_dynamic_state ? 1 : 0);
64 dynamic_vertex_input.Assign(has_dynamic_vertex_input ? 1 : 0);
65 xfb_enabled.Assign(regs.tfb_enabled != 0);
55 primitive_restart_enable.Assign(regs.primitive_restart.enabled != 0 ? 1 : 0); 66 primitive_restart_enable.Assign(regs.primitive_restart.enabled != 0 ? 1 : 0);
56 depth_bias_enable.Assign(enabled_lut[POLYGON_OFFSET_ENABLE_LUT[topology_index]] != 0 ? 1 : 0); 67 depth_bias_enable.Assign(enabled_lut[POLYGON_OFFSET_ENABLE_LUT[topology_index]] != 0 ? 1 : 0);
57 depth_clamp_disabled.Assign(regs.view_volume_clip_control.depth_clamp_disabled.Value()); 68 depth_clamp_disabled.Assign(regs.view_volume_clip_control.depth_clamp_disabled.Value());
@@ -63,37 +74,66 @@ void FixedPipelineState::Refresh(Tegra::Engines::Maxwell3D& maxwell3d,
63 tessellation_clockwise.Assign(regs.tess_mode.cw.Value()); 74 tessellation_clockwise.Assign(regs.tess_mode.cw.Value());
64 logic_op_enable.Assign(regs.logic_op.enable != 0 ? 1 : 0); 75 logic_op_enable.Assign(regs.logic_op.enable != 0 ? 1 : 0);
65 logic_op.Assign(PackLogicOp(regs.logic_op.operation)); 76 logic_op.Assign(PackLogicOp(regs.logic_op.operation));
66 rasterize_enable.Assign(regs.rasterize_enable != 0 ? 1 : 0);
67 topology.Assign(regs.draw.topology); 77 topology.Assign(regs.draw.topology);
68 msaa_mode.Assign(regs.multisample_mode); 78 msaa_mode.Assign(regs.multisample_mode);
69 79
70 raw2 = 0; 80 raw2 = 0;
81 rasterize_enable.Assign(regs.rasterize_enable != 0 ? 1 : 0);
71 const auto test_func = 82 const auto test_func =
72 regs.alpha_test_enabled != 0 ? regs.alpha_test_func : Maxwell::ComparisonOp::Always; 83 regs.alpha_test_enabled != 0 ? regs.alpha_test_func : Maxwell::ComparisonOp::Always;
73 alpha_test_func.Assign(PackComparisonOp(test_func)); 84 alpha_test_func.Assign(PackComparisonOp(test_func));
74 early_z.Assign(regs.force_early_fragment_tests != 0 ? 1 : 0); 85 early_z.Assign(regs.force_early_fragment_tests != 0 ? 1 : 0);
75 86 depth_enabled.Assign(regs.zeta_enable != 0 ? 1 : 0);
87 depth_format.Assign(static_cast<u32>(regs.zeta.format));
88 y_negate.Assign(regs.screen_y_control.y_negate != 0 ? 1 : 0);
89 provoking_vertex_last.Assign(regs.provoking_vertex_last != 0 ? 1 : 0);
90 conservative_raster_enable.Assign(regs.conservative_raster_enable != 0 ? 1 : 0);
91 smooth_lines.Assign(regs.line_smooth_enable != 0 ? 1 : 0);
92
93 for (size_t i = 0; i < regs.rt.size(); ++i) {
94 color_formats[i] = static_cast<u8>(regs.rt[i].format);
95 }
76 alpha_test_ref = Common::BitCast<u32>(regs.alpha_test_ref); 96 alpha_test_ref = Common::BitCast<u32>(regs.alpha_test_ref);
77 point_size = Common::BitCast<u32>(regs.point_size); 97 point_size = Common::BitCast<u32>(regs.point_size);
78 98
79 if (maxwell3d.dirty.flags[Dirty::InstanceDivisors]) { 99 if (maxwell3d.dirty.flags[Dirty::VertexInput]) {
80 maxwell3d.dirty.flags[Dirty::InstanceDivisors] = false; 100 if (has_dynamic_vertex_input) {
81 for (size_t index = 0; index < Maxwell::NumVertexArrays; ++index) { 101 // Dirty flag will be reset by the command buffer update
82 const bool is_enabled = regs.instanced_arrays.IsInstancingEnabled(index); 102 static constexpr std::array LUT{
83 binding_divisors[index] = is_enabled ? regs.vertex_array[index].divisor : 0; 103 0u, // Invalid
84 } 104 1u, // SignedNorm
85 } 105 1u, // UnsignedNorm
86 if (maxwell3d.dirty.flags[Dirty::VertexAttributes]) { 106 2u, // SignedInt
87 maxwell3d.dirty.flags[Dirty::VertexAttributes] = false; 107 3u, // UnsignedInt
88 for (size_t index = 0; index < Maxwell::NumVertexAttributes; ++index) { 108 1u, // UnsignedScaled
89 const auto& input = regs.vertex_attrib_format[index]; 109 1u, // SignedScaled
90 auto& attribute = attributes[index]; 110 1u, // Float
91 attribute.raw = 0; 111 };
92 attribute.enabled.Assign(input.IsConstant() ? 0 : 1); 112 const auto& attrs = regs.vertex_attrib_format;
93 attribute.buffer.Assign(input.buffer); 113 attribute_types = 0;
94 attribute.offset.Assign(input.offset); 114 for (size_t i = 0; i < Maxwell::NumVertexAttributes; ++i) {
95 attribute.type.Assign(static_cast<u32>(input.type.Value())); 115 const u32 mask = attrs[i].constant != 0 ? 0 : 3;
96 attribute.size.Assign(static_cast<u32>(input.size.Value())); 116 const u32 type = LUT[static_cast<size_t>(attrs[i].type.Value())];
117 attribute_types |= static_cast<u64>(type & mask) << (i * 2);
118 }
119 } else {
120 maxwell3d.dirty.flags[Dirty::VertexInput] = false;
121 enabled_divisors = 0;
122 for (size_t index = 0; index < Maxwell::NumVertexArrays; ++index) {
123 const bool is_enabled = regs.instanced_arrays.IsInstancingEnabled(index);
124 binding_divisors[index] = is_enabled ? regs.vertex_array[index].divisor : 0;
125 enabled_divisors |= (is_enabled ? u64{1} : 0) << index;
126 }
127 for (size_t index = 0; index < Maxwell::NumVertexAttributes; ++index) {
128 const auto& input = regs.vertex_attrib_format[index];
129 auto& attribute = attributes[index];
130 attribute.raw = 0;
131 attribute.enabled.Assign(input.constant ? 0 : 1);
132 attribute.buffer.Assign(input.buffer);
133 attribute.offset.Assign(input.offset);
134 attribute.type.Assign(static_cast<u32>(input.type.Value()));
135 attribute.size.Assign(static_cast<u32>(input.size.Value()));
136 }
97 } 137 }
98 } 138 }
99 if (maxwell3d.dirty.flags[Dirty::Blending]) { 139 if (maxwell3d.dirty.flags[Dirty::Blending]) {
@@ -109,10 +149,12 @@ void FixedPipelineState::Refresh(Tegra::Engines::Maxwell3D& maxwell3d,
109 return static_cast<u16>(viewport.swizzle.raw); 149 return static_cast<u16>(viewport.swizzle.raw);
110 }); 150 });
111 } 151 }
112 if (!has_extended_dynamic_state) { 152 if (!extended_dynamic_state) {
113 no_extended_dynamic_state.Assign(1);
114 dynamic_state.Refresh(regs); 153 dynamic_state.Refresh(regs);
115 } 154 }
155 if (xfb_enabled) {
156 RefreshXfbState(xfb_state, regs);
157 }
116} 158}
117 159
118void FixedPipelineState::BlendingAttachment::Refresh(const Maxwell& regs, size_t index) { 160void FixedPipelineState::BlendingAttachment::Refresh(const Maxwell& regs, size_t index) {
diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.h b/src/video_core/renderer_vulkan/fixed_pipeline_state.h
index a0eb83a68..c9be37935 100644
--- a/src/video_core/renderer_vulkan/fixed_pipeline_state.h
+++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.h
@@ -12,6 +12,7 @@
12 12
13#include "video_core/engines/maxwell_3d.h" 13#include "video_core/engines/maxwell_3d.h"
14#include "video_core/surface.h" 14#include "video_core/surface.h"
15#include "video_core/transform_feedback.h"
15 16
16namespace Vulkan { 17namespace Vulkan {
17 18
@@ -60,7 +61,7 @@ struct FixedPipelineState {
60 61
61 void Refresh(const Maxwell& regs, size_t index); 62 void Refresh(const Maxwell& regs, size_t index);
62 63
63 constexpr std::array<bool, 4> Mask() const noexcept { 64 std::array<bool, 4> Mask() const noexcept {
64 return {mask_r != 0, mask_g != 0, mask_b != 0, mask_a != 0}; 65 return {mask_r != 0, mask_g != 0, mask_b != 0, mask_a != 0};
65 } 66 }
66 67
@@ -97,11 +98,11 @@ struct FixedPipelineState {
97 BitField<20, 3, u32> type; 98 BitField<20, 3, u32> type;
98 BitField<23, 6, u32> size; 99 BitField<23, 6, u32> size;
99 100
100 constexpr Maxwell::VertexAttribute::Type Type() const noexcept { 101 Maxwell::VertexAttribute::Type Type() const noexcept {
101 return static_cast<Maxwell::VertexAttribute::Type>(type.Value()); 102 return static_cast<Maxwell::VertexAttribute::Type>(type.Value());
102 } 103 }
103 104
104 constexpr Maxwell::VertexAttribute::Size Size() const noexcept { 105 Maxwell::VertexAttribute::Size Size() const noexcept {
105 return static_cast<Maxwell::VertexAttribute::Size>(size.Value()); 106 return static_cast<Maxwell::VertexAttribute::Size>(size.Value());
106 } 107 }
107 }; 108 };
@@ -167,37 +168,53 @@ struct FixedPipelineState {
167 168
168 union { 169 union {
169 u32 raw1; 170 u32 raw1;
170 BitField<0, 1, u32> no_extended_dynamic_state; 171 BitField<0, 1, u32> extended_dynamic_state;
171 BitField<2, 1, u32> primitive_restart_enable; 172 BitField<1, 1, u32> dynamic_vertex_input;
172 BitField<3, 1, u32> depth_bias_enable; 173 BitField<2, 1, u32> xfb_enabled;
173 BitField<4, 1, u32> depth_clamp_disabled; 174 BitField<3, 1, u32> primitive_restart_enable;
174 BitField<5, 1, u32> ndc_minus_one_to_one; 175 BitField<4, 1, u32> depth_bias_enable;
175 BitField<6, 2, u32> polygon_mode; 176 BitField<5, 1, u32> depth_clamp_disabled;
176 BitField<8, 5, u32> patch_control_points_minus_one; 177 BitField<6, 1, u32> ndc_minus_one_to_one;
177 BitField<13, 2, u32> tessellation_primitive; 178 BitField<7, 2, u32> polygon_mode;
178 BitField<15, 2, u32> tessellation_spacing; 179 BitField<9, 5, u32> patch_control_points_minus_one;
179 BitField<17, 1, u32> tessellation_clockwise; 180 BitField<14, 2, u32> tessellation_primitive;
180 BitField<18, 1, u32> logic_op_enable; 181 BitField<16, 2, u32> tessellation_spacing;
181 BitField<19, 4, u32> logic_op; 182 BitField<18, 1, u32> tessellation_clockwise;
182 BitField<23, 1, u32> rasterize_enable; 183 BitField<19, 1, u32> logic_op_enable;
184 BitField<20, 4, u32> logic_op;
183 BitField<24, 4, Maxwell::PrimitiveTopology> topology; 185 BitField<24, 4, Maxwell::PrimitiveTopology> topology;
184 BitField<28, 4, Tegra::Texture::MsaaMode> msaa_mode; 186 BitField<28, 4, Tegra::Texture::MsaaMode> msaa_mode;
185 }; 187 };
186 union { 188 union {
187 u32 raw2; 189 u32 raw2;
188 BitField<0, 3, u32> alpha_test_func; 190 BitField<0, 1, u32> rasterize_enable;
189 BitField<3, 1, u32> early_z; 191 BitField<1, 3, u32> alpha_test_func;
192 BitField<4, 1, u32> early_z;
193 BitField<5, 1, u32> depth_enabled;
194 BitField<6, 5, u32> depth_format;
195 BitField<11, 1, u32> y_negate;
196 BitField<12, 1, u32> provoking_vertex_last;
197 BitField<13, 1, u32> conservative_raster_enable;
198 BitField<14, 1, u32> smooth_lines;
190 }; 199 };
200 std::array<u8, Maxwell::NumRenderTargets> color_formats;
191 201
192 u32 alpha_test_ref; 202 u32 alpha_test_ref;
193 u32 point_size; 203 u32 point_size;
194 std::array<u32, Maxwell::NumVertexArrays> binding_divisors;
195 std::array<VertexAttribute, Maxwell::NumVertexAttributes> attributes;
196 std::array<BlendingAttachment, Maxwell::NumRenderTargets> attachments; 204 std::array<BlendingAttachment, Maxwell::NumRenderTargets> attachments;
197 std::array<u16, Maxwell::NumViewports> viewport_swizzles; 205 std::array<u16, Maxwell::NumViewports> viewport_swizzles;
206 union {
207 u64 attribute_types; // Used with VK_EXT_vertex_input_dynamic_state
208 u64 enabled_divisors;
209 };
210 std::array<VertexAttribute, Maxwell::NumVertexAttributes> attributes;
211 std::array<u32, Maxwell::NumVertexArrays> binding_divisors;
212
198 DynamicState dynamic_state; 213 DynamicState dynamic_state;
214 VideoCommon::TransformFeedbackState xfb_state;
199 215
200 void Refresh(Tegra::Engines::Maxwell3D& maxwell3d, bool has_extended_dynamic_state); 216 void Refresh(Tegra::Engines::Maxwell3D& maxwell3d, bool has_extended_dynamic_state,
217 bool has_dynamic_vertex_input);
201 218
202 size_t Hash() const noexcept; 219 size_t Hash() const noexcept;
203 220
@@ -208,8 +225,24 @@ struct FixedPipelineState {
208 } 225 }
209 226
210 size_t Size() const noexcept { 227 size_t Size() const noexcept {
211 const size_t total_size = sizeof *this; 228 if (xfb_enabled) {
212 return total_size - (no_extended_dynamic_state != 0 ? 0 : sizeof(DynamicState)); 229 // When transform feedback is enabled, use the whole struct
230 return sizeof(*this);
231 }
232 if (dynamic_vertex_input) {
233 // Exclude dynamic state and attributes
234 return offsetof(FixedPipelineState, attributes);
235 }
236 if (extended_dynamic_state) {
237 // Exclude dynamic state
238 return offsetof(FixedPipelineState, dynamic_state);
239 }
240 // Default
241 return offsetof(FixedPipelineState, xfb_state);
242 }
243
244 u32 DynamicAttributeType(size_t index) const noexcept {
245 return (attribute_types >> (index * 2)) & 0b11;
213 } 246 }
214}; 247};
215static_assert(std::has_unique_object_representations_v<FixedPipelineState>); 248static_assert(std::has_unique_object_representations_v<FixedPipelineState>);
diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
index f088447e9..68a23b602 100644
--- a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
+++ b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
@@ -157,7 +157,7 @@ struct FormatTuple {
157 {VK_FORMAT_R32_SFLOAT, Attachable | Storage}, // R32_FLOAT 157 {VK_FORMAT_R32_SFLOAT, Attachable | Storage}, // R32_FLOAT
158 {VK_FORMAT_R16_SFLOAT, Attachable | Storage}, // R16_FLOAT 158 {VK_FORMAT_R16_SFLOAT, Attachable | Storage}, // R16_FLOAT
159 {VK_FORMAT_R16_UNORM, Attachable | Storage}, // R16_UNORM 159 {VK_FORMAT_R16_UNORM, Attachable | Storage}, // R16_UNORM
160 {VK_FORMAT_UNDEFINED}, // R16_SNORM 160 {VK_FORMAT_R16_SNORM, Attachable | Storage}, // R16_SNORM
161 {VK_FORMAT_R16_UINT, Attachable | Storage}, // R16_UINT 161 {VK_FORMAT_R16_UINT, Attachable | Storage}, // R16_UINT
162 {VK_FORMAT_UNDEFINED}, // R16_SINT 162 {VK_FORMAT_UNDEFINED}, // R16_SINT
163 {VK_FORMAT_R16G16_UNORM, Attachable | Storage}, // R16G16_UNORM 163 {VK_FORMAT_R16G16_UNORM, Attachable | Storage}, // R16G16_UNORM
@@ -266,19 +266,20 @@ FormatInfo SurfaceFormat(const Device& device, FormatType format_type, bool with
266 return {device.GetSupportedFormat(tuple.format, usage, format_type), attachable, storage}; 266 return {device.GetSupportedFormat(tuple.format, usage, format_type), attachable, storage};
267} 267}
268 268
269VkShaderStageFlagBits ShaderStage(Tegra::Engines::ShaderType stage) { 269VkShaderStageFlagBits ShaderStage(Shader::Stage stage) {
270 switch (stage) { 270 switch (stage) {
271 case Tegra::Engines::ShaderType::Vertex: 271 case Shader::Stage::VertexA:
272 case Shader::Stage::VertexB:
272 return VK_SHADER_STAGE_VERTEX_BIT; 273 return VK_SHADER_STAGE_VERTEX_BIT;
273 case Tegra::Engines::ShaderType::TesselationControl: 274 case Shader::Stage::TessellationControl:
274 return VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT; 275 return VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT;
275 case Tegra::Engines::ShaderType::TesselationEval: 276 case Shader::Stage::TessellationEval:
276 return VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT; 277 return VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT;
277 case Tegra::Engines::ShaderType::Geometry: 278 case Shader::Stage::Geometry:
278 return VK_SHADER_STAGE_GEOMETRY_BIT; 279 return VK_SHADER_STAGE_GEOMETRY_BIT;
279 case Tegra::Engines::ShaderType::Fragment: 280 case Shader::Stage::Fragment:
280 return VK_SHADER_STAGE_FRAGMENT_BIT; 281 return VK_SHADER_STAGE_FRAGMENT_BIT;
281 case Tegra::Engines::ShaderType::Compute: 282 case Shader::Stage::Compute:
282 return VK_SHADER_STAGE_COMPUTE_BIT; 283 return VK_SHADER_STAGE_COMPUTE_BIT;
283 } 284 }
284 UNIMPLEMENTED_MSG("Unimplemented shader stage={}", stage); 285 UNIMPLEMENTED_MSG("Unimplemented shader stage={}", stage);
@@ -685,6 +686,19 @@ VkCullModeFlagBits CullFace(Maxwell::CullFace cull_face) {
685 return {}; 686 return {};
686} 687}
687 688
689VkPolygonMode PolygonMode(Maxwell::PolygonMode polygon_mode) {
690 switch (polygon_mode) {
691 case Maxwell::PolygonMode::Point:
692 return VK_POLYGON_MODE_POINT;
693 case Maxwell::PolygonMode::Line:
694 return VK_POLYGON_MODE_LINE;
695 case Maxwell::PolygonMode::Fill:
696 return VK_POLYGON_MODE_FILL;
697 }
698 UNIMPLEMENTED_MSG("Unimplemented polygon mode={}", polygon_mode);
699 return {};
700}
701
688VkComponentSwizzle SwizzleSource(Tegra::Texture::SwizzleSource swizzle) { 702VkComponentSwizzle SwizzleSource(Tegra::Texture::SwizzleSource swizzle) {
689 switch (swizzle) { 703 switch (swizzle) {
690 case Tegra::Texture::SwizzleSource::Zero: 704 case Tegra::Texture::SwizzleSource::Zero:
@@ -741,4 +755,28 @@ VkSamplerReductionMode SamplerReduction(Tegra::Texture::SamplerReduction reducti
741 return VK_SAMPLER_REDUCTION_MODE_WEIGHTED_AVERAGE_EXT; 755 return VK_SAMPLER_REDUCTION_MODE_WEIGHTED_AVERAGE_EXT;
742} 756}
743 757
758VkSampleCountFlagBits MsaaMode(Tegra::Texture::MsaaMode msaa_mode) {
759 switch (msaa_mode) {
760 case Tegra::Texture::MsaaMode::Msaa1x1:
761 return VK_SAMPLE_COUNT_1_BIT;
762 case Tegra::Texture::MsaaMode::Msaa2x1:
763 case Tegra::Texture::MsaaMode::Msaa2x1_D3D:
764 return VK_SAMPLE_COUNT_2_BIT;
765 case Tegra::Texture::MsaaMode::Msaa2x2:
766 case Tegra::Texture::MsaaMode::Msaa2x2_VC4:
767 case Tegra::Texture::MsaaMode::Msaa2x2_VC12:
768 return VK_SAMPLE_COUNT_4_BIT;
769 case Tegra::Texture::MsaaMode::Msaa4x2:
770 case Tegra::Texture::MsaaMode::Msaa4x2_D3D:
771 case Tegra::Texture::MsaaMode::Msaa4x2_VC8:
772 case Tegra::Texture::MsaaMode::Msaa4x2_VC24:
773 return VK_SAMPLE_COUNT_8_BIT;
774 case Tegra::Texture::MsaaMode::Msaa4x4:
775 return VK_SAMPLE_COUNT_16_BIT;
776 default:
777 UNREACHABLE_MSG("Invalid msaa_mode={}", static_cast<int>(msaa_mode));
778 return VK_SAMPLE_COUNT_1_BIT;
779 }
780}
781
744} // namespace Vulkan::MaxwellToVK 782} // namespace Vulkan::MaxwellToVK
diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.h b/src/video_core/renderer_vulkan/maxwell_to_vk.h
index e3e06ba38..8a9616039 100644
--- a/src/video_core/renderer_vulkan/maxwell_to_vk.h
+++ b/src/video_core/renderer_vulkan/maxwell_to_vk.h
@@ -5,6 +5,7 @@
5#pragma once 5#pragma once
6 6
7#include "common/common_types.h" 7#include "common/common_types.h"
8#include "shader_recompiler/stage.h"
8#include "video_core/engines/maxwell_3d.h" 9#include "video_core/engines/maxwell_3d.h"
9#include "video_core/surface.h" 10#include "video_core/surface.h"
10#include "video_core/textures/texture.h" 11#include "video_core/textures/texture.h"
@@ -45,7 +46,7 @@ struct FormatInfo {
45[[nodiscard]] FormatInfo SurfaceFormat(const Device& device, FormatType format_type, bool with_srgb, 46[[nodiscard]] FormatInfo SurfaceFormat(const Device& device, FormatType format_type, bool with_srgb,
46 PixelFormat pixel_format); 47 PixelFormat pixel_format);
47 48
48VkShaderStageFlagBits ShaderStage(Tegra::Engines::ShaderType stage); 49VkShaderStageFlagBits ShaderStage(Shader::Stage stage);
49 50
50VkPrimitiveTopology PrimitiveTopology(const Device& device, Maxwell::PrimitiveTopology topology); 51VkPrimitiveTopology PrimitiveTopology(const Device& device, Maxwell::PrimitiveTopology topology);
51 52
@@ -65,10 +66,14 @@ VkFrontFace FrontFace(Maxwell::FrontFace front_face);
65 66
66VkCullModeFlagBits CullFace(Maxwell::CullFace cull_face); 67VkCullModeFlagBits CullFace(Maxwell::CullFace cull_face);
67 68
69VkPolygonMode PolygonMode(Maxwell::PolygonMode polygon_mode);
70
68VkComponentSwizzle SwizzleSource(Tegra::Texture::SwizzleSource swizzle); 71VkComponentSwizzle SwizzleSource(Tegra::Texture::SwizzleSource swizzle);
69 72
70VkViewportCoordinateSwizzleNV ViewportSwizzle(Maxwell::ViewportSwizzle swizzle); 73VkViewportCoordinateSwizzleNV ViewportSwizzle(Maxwell::ViewportSwizzle swizzle);
71 74
72VkSamplerReductionMode SamplerReduction(Tegra::Texture::SamplerReduction reduction); 75VkSamplerReductionMode SamplerReduction(Tegra::Texture::SamplerReduction reduction);
73 76
77VkSampleCountFlagBits MsaaMode(Tegra::Texture::MsaaMode msaa_mode);
78
74} // namespace Vulkan::MaxwellToVK 79} // namespace Vulkan::MaxwellToVK
diff --git a/src/video_core/renderer_vulkan/pipeline_helper.h b/src/video_core/renderer_vulkan/pipeline_helper.h
new file mode 100644
index 000000000..4847db6b6
--- /dev/null
+++ b/src/video_core/renderer_vulkan/pipeline_helper.h
@@ -0,0 +1,154 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <cstddef>
8
9#include <boost/container/small_vector.hpp>
10
11#include "common/assert.h"
12#include "common/common_types.h"
13#include "shader_recompiler/shader_info.h"
14#include "video_core/renderer_vulkan/vk_texture_cache.h"
15#include "video_core/renderer_vulkan/vk_update_descriptor.h"
16#include "video_core/texture_cache/texture_cache.h"
17#include "video_core/texture_cache/types.h"
18#include "video_core/textures/texture.h"
19#include "video_core/vulkan_common/vulkan_device.h"
20
21namespace Vulkan {
22
23class DescriptorLayoutBuilder {
24public:
25 DescriptorLayoutBuilder(const Device& device_) : device{&device_} {}
26
27 bool CanUsePushDescriptor() const noexcept {
28 return device->IsKhrPushDescriptorSupported() &&
29 num_descriptors <= device->MaxPushDescriptors();
30 }
31
32 vk::DescriptorSetLayout CreateDescriptorSetLayout(bool use_push_descriptor) const {
33 if (bindings.empty()) {
34 return nullptr;
35 }
36 const VkDescriptorSetLayoutCreateFlags flags =
37 use_push_descriptor ? VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR : 0;
38 return device->GetLogical().CreateDescriptorSetLayout({
39 .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
40 .pNext = nullptr,
41 .flags = flags,
42 .bindingCount = static_cast<u32>(bindings.size()),
43 .pBindings = bindings.data(),
44 });
45 }
46
47 vk::DescriptorUpdateTemplateKHR CreateTemplate(VkDescriptorSetLayout descriptor_set_layout,
48 VkPipelineLayout pipeline_layout,
49 bool use_push_descriptor) const {
50 if (entries.empty()) {
51 return nullptr;
52 }
53 const VkDescriptorUpdateTemplateType type =
54 use_push_descriptor ? VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_PUSH_DESCRIPTORS_KHR
55 : VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_DESCRIPTOR_SET_KHR;
56 return device->GetLogical().CreateDescriptorUpdateTemplateKHR({
57 .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_UPDATE_TEMPLATE_CREATE_INFO_KHR,
58 .pNext = nullptr,
59 .flags = 0,
60 .descriptorUpdateEntryCount = static_cast<u32>(entries.size()),
61 .pDescriptorUpdateEntries = entries.data(),
62 .templateType = type,
63 .descriptorSetLayout = descriptor_set_layout,
64 .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
65 .pipelineLayout = pipeline_layout,
66 .set = 0,
67 });
68 }
69
70 vk::PipelineLayout CreatePipelineLayout(VkDescriptorSetLayout descriptor_set_layout) const {
71 return device->GetLogical().CreatePipelineLayout({
72 .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
73 .pNext = nullptr,
74 .flags = 0,
75 .setLayoutCount = descriptor_set_layout ? 1U : 0U,
76 .pSetLayouts = bindings.empty() ? nullptr : &descriptor_set_layout,
77 .pushConstantRangeCount = 0,
78 .pPushConstantRanges = nullptr,
79 });
80 }
81
82 void Add(const Shader::Info& info, VkShaderStageFlags stage) {
83 Add(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, stage, info.constant_buffer_descriptors);
84 Add(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, stage, info.storage_buffers_descriptors);
85 Add(VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, stage, info.texture_buffer_descriptors);
86 Add(VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER, stage, info.image_buffer_descriptors);
87 Add(VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, stage, info.texture_descriptors);
88 Add(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, stage, info.image_descriptors);
89 }
90
91private:
92 template <typename Descriptors>
93 void Add(VkDescriptorType type, VkShaderStageFlags stage, const Descriptors& descriptors) {
94 const size_t num{descriptors.size()};
95 for (size_t i = 0; i < num; ++i) {
96 bindings.push_back({
97 .binding = binding,
98 .descriptorType = type,
99 .descriptorCount = descriptors[i].count,
100 .stageFlags = stage,
101 .pImmutableSamplers = nullptr,
102 });
103 entries.push_back({
104 .dstBinding = binding,
105 .dstArrayElement = 0,
106 .descriptorCount = descriptors[i].count,
107 .descriptorType = type,
108 .offset = offset,
109 .stride = sizeof(DescriptorUpdateEntry),
110 });
111 ++binding;
112 num_descriptors += descriptors[i].count;
113 offset += sizeof(DescriptorUpdateEntry);
114 }
115 }
116
117 const Device* device{};
118 boost::container::small_vector<VkDescriptorSetLayoutBinding, 32> bindings;
119 boost::container::small_vector<VkDescriptorUpdateTemplateEntryKHR, 32> entries;
120 u32 binding{};
121 u32 num_descriptors{};
122 size_t offset{};
123};
124
125inline void PushImageDescriptors(const Shader::Info& info, const VkSampler*& samplers,
126 const ImageId*& image_view_ids, TextureCache& texture_cache,
127 VKUpdateDescriptorQueue& update_descriptor_queue) {
128 for (const auto& desc : info.texture_buffer_descriptors) {
129 image_view_ids += desc.count;
130 }
131 for (const auto& desc : info.image_buffer_descriptors) {
132 image_view_ids += desc.count;
133 }
134 for (const auto& desc : info.texture_descriptors) {
135 for (u32 index = 0; index < desc.count; ++index) {
136 const VkSampler sampler{*(samplers++)};
137 ImageView& image_view{texture_cache.GetImageView(*(image_view_ids++))};
138 const VkImageView vk_image_view{image_view.Handle(desc.type)};
139 update_descriptor_queue.AddSampledImage(vk_image_view, sampler);
140 }
141 }
142 for (const auto& desc : info.image_descriptors) {
143 for (u32 index = 0; index < desc.count; ++index) {
144 ImageView& image_view{texture_cache.GetImageView(*(image_view_ids++))};
145 if (desc.is_written) {
146 texture_cache.MarkModification(image_view.image_id);
147 }
148 const VkImageView vk_image_view{image_view.StorageView(desc.type, desc.format)};
149 update_descriptor_queue.AddImage(vk_image_view);
150 }
151 }
152}
153
154} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.cpp b/src/video_core/renderer_vulkan/renderer_vulkan.cpp
index bec3a81d9..a8d04dc61 100644
--- a/src/video_core/renderer_vulkan/renderer_vulkan.cpp
+++ b/src/video_core/renderer_vulkan/renderer_vulkan.cpp
@@ -130,35 +130,45 @@ void RendererVulkan::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
130 if (!framebuffer) { 130 if (!framebuffer) {
131 return; 131 return;
132 } 132 }
133 const auto& layout = render_window.GetFramebufferLayout(); 133 SCOPE_EXIT({ render_window.OnFrameDisplayed(); });
134 if (layout.width > 0 && layout.height > 0 && render_window.IsShown()) { 134 if (!render_window.IsShown()) {
135 const VAddr framebuffer_addr = framebuffer->address + framebuffer->offset; 135 return;
136 const bool use_accelerated = 136 }
137 rasterizer.AccelerateDisplay(*framebuffer, framebuffer_addr, framebuffer->stride); 137 const VAddr framebuffer_addr = framebuffer->address + framebuffer->offset;
138 const bool is_srgb = use_accelerated && screen_info.is_srgb; 138 const bool use_accelerated =
139 if (swapchain.HasFramebufferChanged(layout) || swapchain.GetSrgbState() != is_srgb) { 139 rasterizer.AccelerateDisplay(*framebuffer, framebuffer_addr, framebuffer->stride);
140 swapchain.Create(layout.width, layout.height, is_srgb); 140 const bool is_srgb = use_accelerated && screen_info.is_srgb;
141 blit_screen.Recreate(); 141
142 } 142 bool has_been_recreated = false;
143 143 const auto recreate_swapchain = [&] {
144 scheduler.WaitWorker(); 144 if (!has_been_recreated) {
145 145 has_been_recreated = true;
146 while (!swapchain.AcquireNextImage()) { 146 scheduler.WaitWorker();
147 swapchain.Create(layout.width, layout.height, is_srgb);
148 blit_screen.Recreate();
149 } 147 }
150 const VkSemaphore render_semaphore = blit_screen.Draw(*framebuffer, use_accelerated); 148 const Layout::FramebufferLayout layout = render_window.GetFramebufferLayout();
151 149 swapchain.Create(layout.width, layout.height, is_srgb);
152 scheduler.Flush(render_semaphore); 150 };
153 151 if (swapchain.IsSubOptimal() || swapchain.HasColorSpaceChanged(is_srgb)) {
154 if (swapchain.Present(render_semaphore)) { 152 recreate_swapchain();
155 blit_screen.Recreate(); 153 }
154 bool is_outdated;
155 do {
156 swapchain.AcquireNextImage();
157 is_outdated = swapchain.IsOutDated();
158 if (is_outdated) {
159 recreate_swapchain();
156 } 160 }
157 gpu.RendererFrameEndNotify(); 161 } while (is_outdated);
158 rasterizer.TickFrame(); 162 if (has_been_recreated) {
163 blit_screen.Recreate();
159 } 164 }
165 const VkSemaphore render_semaphore = blit_screen.Draw(*framebuffer, use_accelerated);
166 scheduler.Flush(render_semaphore);
167 scheduler.WaitWorker();
168 swapchain.Present(render_semaphore);
160 169
161 render_window.OnFrameDisplayed(); 170 gpu.RendererFrameEndNotify();
171 rasterizer.TickFrame();
162} 172}
163 173
164void RendererVulkan::Report() const { 174void RendererVulkan::Report() const {
diff --git a/src/video_core/renderer_vulkan/vk_blit_screen.cpp b/src/video_core/renderer_vulkan/vk_blit_screen.cpp
index 363134129..516f428e7 100644
--- a/src/video_core/renderer_vulkan/vk_blit_screen.cpp
+++ b/src/video_core/renderer_vulkan/vk_blit_screen.cpp
@@ -184,47 +184,43 @@ VkSemaphore VKBlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer, bool
184 .depth = 1, 184 .depth = 1,
185 }, 185 },
186 }; 186 };
187 scheduler.Record( 187 scheduler.Record([this, copy, image_index](vk::CommandBuffer cmdbuf) {
188 [buffer = *buffer, image = *raw_images[image_index], copy](vk::CommandBuffer cmdbuf) { 188 const VkImage image = *raw_images[image_index];
189 const VkImageMemoryBarrier base_barrier{ 189 const VkImageMemoryBarrier base_barrier{
190 .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, 190 .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
191 .pNext = nullptr, 191 .pNext = nullptr,
192 .srcAccessMask = 0, 192 .srcAccessMask = 0,
193 .dstAccessMask = 0, 193 .dstAccessMask = 0,
194 .oldLayout = VK_IMAGE_LAYOUT_GENERAL, 194 .oldLayout = VK_IMAGE_LAYOUT_GENERAL,
195 .newLayout = VK_IMAGE_LAYOUT_GENERAL, 195 .newLayout = VK_IMAGE_LAYOUT_GENERAL,
196 .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, 196 .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
197 .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, 197 .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
198 .image = image, 198 .image = image,
199 .subresourceRange = 199 .subresourceRange{
200 { 200 .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
201 .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, 201 .baseMipLevel = 0,
202 .baseMipLevel = 0, 202 .levelCount = 1,
203 .levelCount = 1, 203 .baseArrayLayer = 0,
204 .baseArrayLayer = 0, 204 .layerCount = 1,
205 .layerCount = 1, 205 },
206 }, 206 };
207 }; 207 VkImageMemoryBarrier read_barrier = base_barrier;
208 VkImageMemoryBarrier read_barrier = base_barrier; 208 read_barrier.srcAccessMask = VK_ACCESS_HOST_WRITE_BIT;
209 read_barrier.srcAccessMask = VK_ACCESS_HOST_WRITE_BIT; 209 read_barrier.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
210 read_barrier.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; 210 read_barrier.oldLayout = VK_IMAGE_LAYOUT_UNDEFINED;
211 read_barrier.oldLayout = VK_IMAGE_LAYOUT_UNDEFINED; 211
212 212 VkImageMemoryBarrier write_barrier = base_barrier;
213 VkImageMemoryBarrier write_barrier = base_barrier; 213 write_barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
214 write_barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; 214 write_barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT;
215 write_barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT; 215
216 216 cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_HOST_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, 0,
217 cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_HOST_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, 217 read_barrier);
218 0, read_barrier); 218 cmdbuf.CopyBufferToImage(*buffer, image, VK_IMAGE_LAYOUT_GENERAL, copy);
219 cmdbuf.CopyBufferToImage(buffer, image, VK_IMAGE_LAYOUT_GENERAL, copy); 219 cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT,
220 cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, 220 VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, 0, write_barrier);
221 VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, 0, write_barrier); 221 });
222 });
223 } 222 }
224 scheduler.Record([renderpass = *renderpass, framebuffer = *framebuffers[image_index], 223 scheduler.Record([this, image_index, size = swapchain.GetSize()](vk::CommandBuffer cmdbuf) {
225 descriptor_set = descriptor_sets[image_index], buffer = *buffer,
226 size = swapchain.GetSize(), pipeline = *pipeline,
227 layout = *pipeline_layout](vk::CommandBuffer cmdbuf) {
228 const f32 bg_red = Settings::values.bg_red.GetValue() / 255.0f; 224 const f32 bg_red = Settings::values.bg_red.GetValue() / 255.0f;
229 const f32 bg_green = Settings::values.bg_green.GetValue() / 255.0f; 225 const f32 bg_green = Settings::values.bg_green.GetValue() / 255.0f;
230 const f32 bg_blue = Settings::values.bg_blue.GetValue() / 255.0f; 226 const f32 bg_blue = Settings::values.bg_blue.GetValue() / 255.0f;
@@ -234,8 +230,8 @@ VkSemaphore VKBlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer, bool
234 const VkRenderPassBeginInfo renderpass_bi{ 230 const VkRenderPassBeginInfo renderpass_bi{
235 .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO, 231 .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,
236 .pNext = nullptr, 232 .pNext = nullptr,
237 .renderPass = renderpass, 233 .renderPass = *renderpass,
238 .framebuffer = framebuffer, 234 .framebuffer = *framebuffers[image_index],
239 .renderArea = 235 .renderArea =
240 { 236 {
241 .offset = {0, 0}, 237 .offset = {0, 0},
@@ -257,12 +253,13 @@ VkSemaphore VKBlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer, bool
257 .extent = size, 253 .extent = size,
258 }; 254 };
259 cmdbuf.BeginRenderPass(renderpass_bi, VK_SUBPASS_CONTENTS_INLINE); 255 cmdbuf.BeginRenderPass(renderpass_bi, VK_SUBPASS_CONTENTS_INLINE);
260 cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline); 256 cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline);
261 cmdbuf.SetViewport(0, viewport); 257 cmdbuf.SetViewport(0, viewport);
262 cmdbuf.SetScissor(0, scissor); 258 cmdbuf.SetScissor(0, scissor);
263 259
264 cmdbuf.BindVertexBuffer(0, buffer, offsetof(BufferData, vertices)); 260 cmdbuf.BindVertexBuffer(0, *buffer, offsetof(BufferData, vertices));
265 cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, layout, 0, descriptor_set, {}); 261 cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline_layout, 0,
262 descriptor_sets[image_index], {});
266 cmdbuf.Draw(4, 1, 0, 0); 263 cmdbuf.Draw(4, 1, 0, 0);
267 cmdbuf.EndRenderPass(); 264 cmdbuf.EndRenderPass();
268 }); 265 });
@@ -304,8 +301,7 @@ void VKBlitScreen::CreateShaders() {
304 301
305void VKBlitScreen::CreateSemaphores() { 302void VKBlitScreen::CreateSemaphores() {
306 semaphores.resize(image_count); 303 semaphores.resize(image_count);
307 std::generate(semaphores.begin(), semaphores.end(), 304 std::ranges::generate(semaphores, [this] { return device.GetLogical().CreateSemaphore(); });
308 [this] { return device.GetLogical().CreateSemaphore(); });
309} 305}
310 306
311void VKBlitScreen::CreateDescriptorPool() { 307void VKBlitScreen::CreateDescriptorPool() {
@@ -633,8 +629,8 @@ void VKBlitScreen::CreateFramebuffers() {
633} 629}
634 630
635void VKBlitScreen::ReleaseRawImages() { 631void VKBlitScreen::ReleaseRawImages() {
636 for (std::size_t i = 0; i < raw_images.size(); ++i) { 632 for (const u64 tick : resource_ticks) {
637 scheduler.Wait(resource_ticks.at(i)); 633 scheduler.Wait(tick);
638 } 634 }
639 raw_images.clear(); 635 raw_images.clear();
640 raw_buffer_commits.clear(); 636 raw_buffer_commits.clear();
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
index 0def1e769..f4b3ee95c 100644
--- a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
@@ -60,38 +60,74 @@ std::array<T, 6> MakeQuadIndices(u32 quad, u32 first) {
60 } 60 }
61 return indices; 61 return indices;
62} 62}
63} // Anonymous namespace
64
65Buffer::Buffer(BufferCacheRuntime&, VideoCommon::NullBufferParams null_params)
66 : VideoCommon::BufferBase<VideoCore::RasterizerInterface>(null_params) {}
67 63
68Buffer::Buffer(BufferCacheRuntime& runtime, VideoCore::RasterizerInterface& rasterizer_, 64vk::Buffer CreateBuffer(const Device& device, u64 size) {
69 VAddr cpu_addr_, u64 size_bytes_) 65 VkBufferUsageFlags flags =
70 : VideoCommon::BufferBase<VideoCore::RasterizerInterface>(rasterizer_, cpu_addr_, size_bytes_) { 66 VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT |
71 buffer = runtime.device.GetLogical().CreateBuffer(VkBufferCreateInfo{ 67 VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT |
68 VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
69 VK_BUFFER_USAGE_INDEX_BUFFER_BIT | VK_BUFFER_USAGE_VERTEX_BUFFER_BIT;
70 if (device.IsExtTransformFeedbackSupported()) {
71 flags |= VK_BUFFER_USAGE_TRANSFORM_FEEDBACK_BUFFER_BIT_EXT;
72 }
73 return device.GetLogical().CreateBuffer({
72 .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, 74 .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
73 .pNext = nullptr, 75 .pNext = nullptr,
74 .flags = 0, 76 .flags = 0,
75 .size = SizeBytes(), 77 .size = size,
76 .usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT | 78 .usage = flags,
77 VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT |
78 VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT | VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT |
79 VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_INDEX_BUFFER_BIT |
80 VK_BUFFER_USAGE_VERTEX_BUFFER_BIT,
81 .sharingMode = VK_SHARING_MODE_EXCLUSIVE, 79 .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
82 .queueFamilyIndexCount = 0, 80 .queueFamilyIndexCount = 0,
83 .pQueueFamilyIndices = nullptr, 81 .pQueueFamilyIndices = nullptr,
84 }); 82 });
83}
84} // Anonymous namespace
85
86Buffer::Buffer(BufferCacheRuntime&, VideoCommon::NullBufferParams null_params)
87 : VideoCommon::BufferBase<VideoCore::RasterizerInterface>(null_params) {}
88
89Buffer::Buffer(BufferCacheRuntime& runtime, VideoCore::RasterizerInterface& rasterizer_,
90 VAddr cpu_addr_, u64 size_bytes_)
91 : VideoCommon::BufferBase<VideoCore::RasterizerInterface>(rasterizer_, cpu_addr_, size_bytes_),
92 device{&runtime.device}, buffer{CreateBuffer(*device, SizeBytes())},
93 commit{runtime.memory_allocator.Commit(buffer, MemoryUsage::DeviceLocal)} {
85 if (runtime.device.HasDebuggingToolAttached()) { 94 if (runtime.device.HasDebuggingToolAttached()) {
86 buffer.SetObjectNameEXT(fmt::format("Buffer 0x{:x}", CpuAddr()).c_str()); 95 buffer.SetObjectNameEXT(fmt::format("Buffer 0x{:x}", CpuAddr()).c_str());
87 } 96 }
88 commit = runtime.memory_allocator.Commit(buffer, MemoryUsage::DeviceLocal); 97}
98
99VkBufferView Buffer::View(u32 offset, u32 size, VideoCore::Surface::PixelFormat format) {
100 if (!device) {
101 // Null buffer, return a null descriptor
102 return VK_NULL_HANDLE;
103 }
104 const auto it{std::ranges::find_if(views, [offset, size, format](const BufferView& view) {
105 return offset == view.offset && size == view.size && format == view.format;
106 })};
107 if (it != views.end()) {
108 return *it->handle;
109 }
110 views.push_back({
111 .offset = offset,
112 .size = size,
113 .format = format,
114 .handle = device->GetLogical().CreateBufferView({
115 .sType = VK_STRUCTURE_TYPE_BUFFER_VIEW_CREATE_INFO,
116 .pNext = nullptr,
117 .flags = 0,
118 .buffer = *buffer,
119 .format = MaxwellToVK::SurfaceFormat(*device, FormatType::Buffer, false, format).format,
120 .offset = offset,
121 .range = size,
122 }),
123 });
124 return *views.back().handle;
89} 125}
90 126
91BufferCacheRuntime::BufferCacheRuntime(const Device& device_, MemoryAllocator& memory_allocator_, 127BufferCacheRuntime::BufferCacheRuntime(const Device& device_, MemoryAllocator& memory_allocator_,
92 VKScheduler& scheduler_, StagingBufferPool& staging_pool_, 128 VKScheduler& scheduler_, StagingBufferPool& staging_pool_,
93 VKUpdateDescriptorQueue& update_descriptor_queue_, 129 VKUpdateDescriptorQueue& update_descriptor_queue_,
94 VKDescriptorPool& descriptor_pool) 130 DescriptorPool& descriptor_pool)
95 : device{device_}, memory_allocator{memory_allocator_}, scheduler{scheduler_}, 131 : device{device_}, memory_allocator{memory_allocator_}, scheduler{scheduler_},
96 staging_pool{staging_pool_}, update_descriptor_queue{update_descriptor_queue_}, 132 staging_pool{staging_pool_}, update_descriptor_queue{update_descriptor_queue_},
97 uint8_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue), 133 uint8_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue),
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.h b/src/video_core/renderer_vulkan/vk_buffer_cache.h
index 3bb81d5b3..c27402ff0 100644
--- a/src/video_core/renderer_vulkan/vk_buffer_cache.h
+++ b/src/video_core/renderer_vulkan/vk_buffer_cache.h
@@ -9,13 +9,14 @@
9#include "video_core/renderer_vulkan/vk_compute_pass.h" 9#include "video_core/renderer_vulkan/vk_compute_pass.h"
10#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" 10#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h"
11#include "video_core/renderer_vulkan/vk_update_descriptor.h" 11#include "video_core/renderer_vulkan/vk_update_descriptor.h"
12#include "video_core/surface.h"
12#include "video_core/vulkan_common/vulkan_memory_allocator.h" 13#include "video_core/vulkan_common/vulkan_memory_allocator.h"
13#include "video_core/vulkan_common/vulkan_wrapper.h" 14#include "video_core/vulkan_common/vulkan_wrapper.h"
14 15
15namespace Vulkan { 16namespace Vulkan {
16 17
17class Device; 18class Device;
18class VKDescriptorPool; 19class DescriptorPool;
19class VKScheduler; 20class VKScheduler;
20 21
21class BufferCacheRuntime; 22class BufferCacheRuntime;
@@ -26,6 +27,8 @@ public:
26 explicit Buffer(BufferCacheRuntime& runtime, VideoCore::RasterizerInterface& rasterizer_, 27 explicit Buffer(BufferCacheRuntime& runtime, VideoCore::RasterizerInterface& rasterizer_,
27 VAddr cpu_addr_, u64 size_bytes_); 28 VAddr cpu_addr_, u64 size_bytes_);
28 29
30 [[nodiscard]] VkBufferView View(u32 offset, u32 size, VideoCore::Surface::PixelFormat format);
31
29 [[nodiscard]] VkBuffer Handle() const noexcept { 32 [[nodiscard]] VkBuffer Handle() const noexcept {
30 return *buffer; 33 return *buffer;
31 } 34 }
@@ -35,8 +38,17 @@ public:
35 } 38 }
36 39
37private: 40private:
41 struct BufferView {
42 u32 offset;
43 u32 size;
44 VideoCore::Surface::PixelFormat format;
45 vk::BufferView handle;
46 };
47
48 const Device* device{};
38 vk::Buffer buffer; 49 vk::Buffer buffer;
39 MemoryCommit commit; 50 MemoryCommit commit;
51 std::vector<BufferView> views;
40}; 52};
41 53
42class BufferCacheRuntime { 54class BufferCacheRuntime {
@@ -49,7 +61,7 @@ public:
49 explicit BufferCacheRuntime(const Device& device_, MemoryAllocator& memory_manager_, 61 explicit BufferCacheRuntime(const Device& device_, MemoryAllocator& memory_manager_,
50 VKScheduler& scheduler_, StagingBufferPool& staging_pool_, 62 VKScheduler& scheduler_, StagingBufferPool& staging_pool_,
51 VKUpdateDescriptorQueue& update_descriptor_queue_, 63 VKUpdateDescriptorQueue& update_descriptor_queue_,
52 VKDescriptorPool& descriptor_pool); 64 DescriptorPool& descriptor_pool);
53 65
54 void Finish(); 66 void Finish();
55 67
@@ -87,6 +99,11 @@ public:
87 BindBuffer(buffer, offset, size); 99 BindBuffer(buffer, offset, size);
88 } 100 }
89 101
102 void BindTextureBuffer(Buffer& buffer, u32 offset, u32 size,
103 VideoCore::Surface::PixelFormat format) {
104 update_descriptor_queue.AddTexelBuffer(buffer.View(offset, size, format));
105 }
106
90private: 107private:
91 void BindBuffer(VkBuffer buffer, u32 offset, u32 size) { 108 void BindBuffer(VkBuffer buffer, u32 offset, u32 size) {
92 update_descriptor_queue.AddBuffer(buffer, offset, size); 109 update_descriptor_queue.AddBuffer(buffer, offset, size);
@@ -124,6 +141,7 @@ struct BufferCacheParams {
124 static constexpr bool NEEDS_BIND_UNIFORM_INDEX = false; 141 static constexpr bool NEEDS_BIND_UNIFORM_INDEX = false;
125 static constexpr bool NEEDS_BIND_STORAGE_INDEX = false; 142 static constexpr bool NEEDS_BIND_STORAGE_INDEX = false;
126 static constexpr bool USE_MEMORY_MAPS = true; 143 static constexpr bool USE_MEMORY_MAPS = true;
144 static constexpr bool SEPARATE_IMAGE_BUFFER_BINDINGS = false;
127}; 145};
128 146
129using BufferCache = VideoCommon::BufferCache<BufferCacheParams>; 147using BufferCache = VideoCommon::BufferCache<BufferCacheParams>;
diff --git a/src/video_core/renderer_vulkan/vk_compute_pass.cpp b/src/video_core/renderer_vulkan/vk_compute_pass.cpp
index 4181d83ee..8e426ce2c 100644
--- a/src/video_core/renderer_vulkan/vk_compute_pass.cpp
+++ b/src/video_core/renderer_vulkan/vk_compute_pass.cpp
@@ -41,80 +41,92 @@ constexpr u32 ASTC_BINDING_SWIZZLE_BUFFER = 2;
41constexpr u32 ASTC_BINDING_OUTPUT_IMAGE = 3; 41constexpr u32 ASTC_BINDING_OUTPUT_IMAGE = 3;
42constexpr size_t ASTC_NUM_BINDINGS = 4; 42constexpr size_t ASTC_NUM_BINDINGS = 4;
43 43
44VkPushConstantRange BuildComputePushConstantRange(std::size_t size) { 44template <size_t size>
45 return { 45inline constexpr VkPushConstantRange COMPUTE_PUSH_CONSTANT_RANGE{
46 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, 46 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
47 .offset = 0, 47 .offset = 0,
48 .size = static_cast<u32>(size), 48 .size = static_cast<u32>(size),
49 }; 49};
50}
51
52std::array<VkDescriptorSetLayoutBinding, 2> BuildInputOutputDescriptorSetBindings() {
53 return {{
54 {
55 .binding = 0,
56 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
57 .descriptorCount = 1,
58 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
59 .pImmutableSamplers = nullptr,
60 },
61 {
62 .binding = 1,
63 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
64 .descriptorCount = 1,
65 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
66 .pImmutableSamplers = nullptr,
67 },
68 }};
69}
70 50
71std::array<VkDescriptorSetLayoutBinding, ASTC_NUM_BINDINGS> BuildASTCDescriptorSetBindings() { 51constexpr std::array<VkDescriptorSetLayoutBinding, 2> INPUT_OUTPUT_DESCRIPTOR_SET_BINDINGS{{
72 return {{ 52 {
73 { 53 .binding = 0,
74 .binding = ASTC_BINDING_INPUT_BUFFER, 54 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
75 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 55 .descriptorCount = 1,
76 .descriptorCount = 1, 56 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
77 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, 57 .pImmutableSamplers = nullptr,
78 .pImmutableSamplers = nullptr, 58 },
79 }, 59 {
80 { 60 .binding = 1,
81 .binding = ASTC_BINDING_ENC_BUFFER, 61 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
82 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 62 .descriptorCount = 1,
83 .descriptorCount = 1, 63 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
84 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, 64 .pImmutableSamplers = nullptr,
85 .pImmutableSamplers = nullptr, 65 },
86 }, 66}};
87 { 67
88 .binding = ASTC_BINDING_SWIZZLE_BUFFER, 68constexpr DescriptorBankInfo INPUT_OUTPUT_BANK_INFO{
89 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 69 .uniform_buffers = 0,
90 .descriptorCount = 1, 70 .storage_buffers = 2,
91 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, 71 .texture_buffers = 0,
92 .pImmutableSamplers = nullptr, 72 .image_buffers = 0,
93 }, 73 .textures = 0,
94 { 74 .images = 0,
95 .binding = ASTC_BINDING_OUTPUT_IMAGE, 75 .score = 2,
96 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, 76};
97 .descriptorCount = 1,
98 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
99 .pImmutableSamplers = nullptr,
100 },
101 }};
102}
103 77
104VkDescriptorUpdateTemplateEntryKHR BuildInputOutputDescriptorUpdateTemplate() { 78constexpr std::array<VkDescriptorSetLayoutBinding, 4> ASTC_DESCRIPTOR_SET_BINDINGS{{
105 return { 79 {
106 .dstBinding = 0, 80 .binding = ASTC_BINDING_INPUT_BUFFER,
107 .dstArrayElement = 0,
108 .descriptorCount = 2,
109 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 81 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
110 .offset = 0, 82 .descriptorCount = 1,
111 .stride = sizeof(DescriptorUpdateEntry), 83 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
112 }; 84 .pImmutableSamplers = nullptr,
113} 85 },
86 {
87 .binding = ASTC_BINDING_ENC_BUFFER,
88 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
89 .descriptorCount = 1,
90 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
91 .pImmutableSamplers = nullptr,
92 },
93 {
94 .binding = ASTC_BINDING_SWIZZLE_BUFFER,
95 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
96 .descriptorCount = 1,
97 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
98 .pImmutableSamplers = nullptr,
99 },
100 {
101 .binding = ASTC_BINDING_OUTPUT_IMAGE,
102 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
103 .descriptorCount = 1,
104 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
105 .pImmutableSamplers = nullptr,
106 },
107}};
108
109constexpr DescriptorBankInfo ASTC_BANK_INFO{
110 .uniform_buffers = 0,
111 .storage_buffers = 3,
112 .texture_buffers = 0,
113 .image_buffers = 0,
114 .textures = 0,
115 .images = 1,
116 .score = 4,
117};
114 118
115std::array<VkDescriptorUpdateTemplateEntryKHR, ASTC_NUM_BINDINGS> 119constexpr VkDescriptorUpdateTemplateEntryKHR INPUT_OUTPUT_DESCRIPTOR_UPDATE_TEMPLATE{
116BuildASTCPassDescriptorUpdateTemplateEntry() { 120 .dstBinding = 0,
117 return {{ 121 .dstArrayElement = 0,
122 .descriptorCount = 2,
123 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
124 .offset = 0,
125 .stride = sizeof(DescriptorUpdateEntry),
126};
127
128constexpr std::array<VkDescriptorUpdateTemplateEntryKHR, ASTC_NUM_BINDINGS>
129 ASTC_PASS_DESCRIPTOR_UPDATE_TEMPLATE_ENTRY{{
118 { 130 {
119 .dstBinding = ASTC_BINDING_INPUT_BUFFER, 131 .dstBinding = ASTC_BINDING_INPUT_BUFFER,
120 .dstArrayElement = 0, 132 .dstArrayElement = 0,
@@ -148,7 +160,6 @@ BuildASTCPassDescriptorUpdateTemplateEntry() {
148 .stride = sizeof(DescriptorUpdateEntry), 160 .stride = sizeof(DescriptorUpdateEntry),
149 }, 161 },
150 }}; 162 }};
151}
152 163
153struct AstcPushConstants { 164struct AstcPushConstants {
154 std::array<u32, 2> blocks_dims; 165 std::array<u32, 2> blocks_dims;
@@ -159,14 +170,14 @@ struct AstcPushConstants {
159 u32 block_height; 170 u32 block_height;
160 u32 block_height_mask; 171 u32 block_height_mask;
161}; 172};
162
163} // Anonymous namespace 173} // Anonymous namespace
164 174
165VKComputePass::VKComputePass(const Device& device, VKDescriptorPool& descriptor_pool, 175ComputePass::ComputePass(const Device& device_, DescriptorPool& descriptor_pool,
166 vk::Span<VkDescriptorSetLayoutBinding> bindings, 176 vk::Span<VkDescriptorSetLayoutBinding> bindings,
167 vk::Span<VkDescriptorUpdateTemplateEntryKHR> templates, 177 vk::Span<VkDescriptorUpdateTemplateEntryKHR> templates,
168 vk::Span<VkPushConstantRange> push_constants, 178 const DescriptorBankInfo& bank_info,
169 std::span<const u32> code) { 179 vk::Span<VkPushConstantRange> push_constants, std::span<const u32> code)
180 : device{device_} {
170 descriptor_set_layout = device.GetLogical().CreateDescriptorSetLayout({ 181 descriptor_set_layout = device.GetLogical().CreateDescriptorSetLayout({
171 .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, 182 .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
172 .pNext = nullptr, 183 .pNext = nullptr,
@@ -196,8 +207,7 @@ VKComputePass::VKComputePass(const Device& device, VKDescriptorPool& descriptor_
196 .pipelineLayout = *layout, 207 .pipelineLayout = *layout,
197 .set = 0, 208 .set = 0,
198 }); 209 });
199 210 descriptor_allocator = descriptor_pool.Allocator(*descriptor_set_layout, bank_info);
200 descriptor_allocator.emplace(descriptor_pool, *descriptor_set_layout);
201 } 211 }
202 module = device.GetLogical().CreateShaderModule({ 212 module = device.GetLogical().CreateShaderModule({
203 .sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO, 213 .sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO,
@@ -206,43 +216,34 @@ VKComputePass::VKComputePass(const Device& device, VKDescriptorPool& descriptor_
206 .codeSize = static_cast<u32>(code.size_bytes()), 216 .codeSize = static_cast<u32>(code.size_bytes()),
207 .pCode = code.data(), 217 .pCode = code.data(),
208 }); 218 });
219 device.SaveShader(code);
209 pipeline = device.GetLogical().CreateComputePipeline({ 220 pipeline = device.GetLogical().CreateComputePipeline({
210 .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, 221 .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
211 .pNext = nullptr, 222 .pNext = nullptr,
212 .flags = 0, 223 .flags = 0,
213 .stage = 224 .stage{
214 { 225 .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
215 .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, 226 .pNext = nullptr,
216 .pNext = nullptr, 227 .flags = 0,
217 .flags = 0, 228 .stage = VK_SHADER_STAGE_COMPUTE_BIT,
218 .stage = VK_SHADER_STAGE_COMPUTE_BIT, 229 .module = *module,
219 .module = *module, 230 .pName = "main",
220 .pName = "main", 231 .pSpecializationInfo = nullptr,
221 .pSpecializationInfo = nullptr, 232 },
222 },
223 .layout = *layout, 233 .layout = *layout,
224 .basePipelineHandle = nullptr, 234 .basePipelineHandle = nullptr,
225 .basePipelineIndex = 0, 235 .basePipelineIndex = 0,
226 }); 236 });
227} 237}
228 238
229VKComputePass::~VKComputePass() = default; 239ComputePass::~ComputePass() = default;
230 240
231VkDescriptorSet VKComputePass::CommitDescriptorSet( 241Uint8Pass::Uint8Pass(const Device& device_, VKScheduler& scheduler_,
232 VKUpdateDescriptorQueue& update_descriptor_queue) { 242 DescriptorPool& descriptor_pool, StagingBufferPool& staging_buffer_pool_,
233 if (!descriptor_template) {
234 return nullptr;
235 }
236 const VkDescriptorSet set = descriptor_allocator->Commit();
237 update_descriptor_queue.Send(*descriptor_template, set);
238 return set;
239}
240
241Uint8Pass::Uint8Pass(const Device& device, VKScheduler& scheduler_,
242 VKDescriptorPool& descriptor_pool, StagingBufferPool& staging_buffer_pool_,
243 VKUpdateDescriptorQueue& update_descriptor_queue_) 243 VKUpdateDescriptorQueue& update_descriptor_queue_)
244 : VKComputePass(device, descriptor_pool, BuildInputOutputDescriptorSetBindings(), 244 : ComputePass(device_, descriptor_pool, INPUT_OUTPUT_DESCRIPTOR_SET_BINDINGS,
245 BuildInputOutputDescriptorUpdateTemplate(), {}, VULKAN_UINT8_COMP_SPV), 245 INPUT_OUTPUT_DESCRIPTOR_UPDATE_TEMPLATE, INPUT_OUTPUT_BANK_INFO, {},
246 VULKAN_UINT8_COMP_SPV),
246 scheduler{scheduler_}, staging_buffer_pool{staging_buffer_pool_}, 247 scheduler{scheduler_}, staging_buffer_pool{staging_buffer_pool_},
247 update_descriptor_queue{update_descriptor_queue_} {} 248 update_descriptor_queue{update_descriptor_queue_} {}
248 249
@@ -256,11 +257,11 @@ std::pair<VkBuffer, VkDeviceSize> Uint8Pass::Assemble(u32 num_vertices, VkBuffer
256 update_descriptor_queue.Acquire(); 257 update_descriptor_queue.Acquire();
257 update_descriptor_queue.AddBuffer(src_buffer, src_offset, num_vertices); 258 update_descriptor_queue.AddBuffer(src_buffer, src_offset, num_vertices);
258 update_descriptor_queue.AddBuffer(staging.buffer, staging.offset, staging_size); 259 update_descriptor_queue.AddBuffer(staging.buffer, staging.offset, staging_size);
259 const VkDescriptorSet set = CommitDescriptorSet(update_descriptor_queue); 260 const void* const descriptor_data{update_descriptor_queue.UpdateData()};
261 const VkBuffer buffer{staging.buffer};
260 262
261 scheduler.RequestOutsideRenderPassOperationContext(); 263 scheduler.RequestOutsideRenderPassOperationContext();
262 scheduler.Record([layout = *layout, pipeline = *pipeline, buffer = staging.buffer, set, 264 scheduler.Record([this, buffer, descriptor_data, num_vertices](vk::CommandBuffer cmdbuf) {
263 num_vertices](vk::CommandBuffer cmdbuf) {
264 static constexpr u32 DISPATCH_SIZE = 1024; 265 static constexpr u32 DISPATCH_SIZE = 1024;
265 static constexpr VkMemoryBarrier WRITE_BARRIER{ 266 static constexpr VkMemoryBarrier WRITE_BARRIER{
266 .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER, 267 .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER,
@@ -268,8 +269,10 @@ std::pair<VkBuffer, VkDeviceSize> Uint8Pass::Assemble(u32 num_vertices, VkBuffer
268 .srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT, 269 .srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT,
269 .dstAccessMask = VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT, 270 .dstAccessMask = VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT,
270 }; 271 };
271 cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, pipeline); 272 const VkDescriptorSet set = descriptor_allocator.Commit();
272 cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, layout, 0, set, {}); 273 device.GetLogical().UpdateDescriptorSet(set, *descriptor_template, descriptor_data);
274 cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline);
275 cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, *layout, 0, set, {});
273 cmdbuf.Dispatch(Common::DivCeil(num_vertices, DISPATCH_SIZE), 1, 1); 276 cmdbuf.Dispatch(Common::DivCeil(num_vertices, DISPATCH_SIZE), 1, 1);
274 cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 277 cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
275 VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, 0, WRITE_BARRIER); 278 VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, 0, WRITE_BARRIER);
@@ -278,12 +281,12 @@ std::pair<VkBuffer, VkDeviceSize> Uint8Pass::Assemble(u32 num_vertices, VkBuffer
278} 281}
279 282
280QuadIndexedPass::QuadIndexedPass(const Device& device_, VKScheduler& scheduler_, 283QuadIndexedPass::QuadIndexedPass(const Device& device_, VKScheduler& scheduler_,
281 VKDescriptorPool& descriptor_pool_, 284 DescriptorPool& descriptor_pool_,
282 StagingBufferPool& staging_buffer_pool_, 285 StagingBufferPool& staging_buffer_pool_,
283 VKUpdateDescriptorQueue& update_descriptor_queue_) 286 VKUpdateDescriptorQueue& update_descriptor_queue_)
284 : VKComputePass(device_, descriptor_pool_, BuildInputOutputDescriptorSetBindings(), 287 : ComputePass(device_, descriptor_pool_, INPUT_OUTPUT_DESCRIPTOR_SET_BINDINGS,
285 BuildInputOutputDescriptorUpdateTemplate(), 288 INPUT_OUTPUT_DESCRIPTOR_UPDATE_TEMPLATE, INPUT_OUTPUT_BANK_INFO,
286 BuildComputePushConstantRange(sizeof(u32) * 2), VULKAN_QUAD_INDEXED_COMP_SPV), 289 COMPUTE_PUSH_CONSTANT_RANGE<sizeof(u32) * 2>, VULKAN_QUAD_INDEXED_COMP_SPV),
287 scheduler{scheduler_}, staging_buffer_pool{staging_buffer_pool_}, 290 scheduler{scheduler_}, staging_buffer_pool{staging_buffer_pool_},
288 update_descriptor_queue{update_descriptor_queue_} {} 291 update_descriptor_queue{update_descriptor_queue_} {}
289 292
@@ -313,11 +316,11 @@ std::pair<VkBuffer, VkDeviceSize> QuadIndexedPass::Assemble(
313 update_descriptor_queue.Acquire(); 316 update_descriptor_queue.Acquire();
314 update_descriptor_queue.AddBuffer(src_buffer, src_offset, input_size); 317 update_descriptor_queue.AddBuffer(src_buffer, src_offset, input_size);
315 update_descriptor_queue.AddBuffer(staging.buffer, staging.offset, staging_size); 318 update_descriptor_queue.AddBuffer(staging.buffer, staging.offset, staging_size);
316 const VkDescriptorSet set = CommitDescriptorSet(update_descriptor_queue); 319 const void* const descriptor_data{update_descriptor_queue.UpdateData()};
317 320
318 scheduler.RequestOutsideRenderPassOperationContext(); 321 scheduler.RequestOutsideRenderPassOperationContext();
319 scheduler.Record([layout = *layout, pipeline = *pipeline, buffer = staging.buffer, set, 322 scheduler.Record([this, buffer = staging.buffer, descriptor_data, num_tri_vertices, base_vertex,
320 num_tri_vertices, base_vertex, index_shift](vk::CommandBuffer cmdbuf) { 323 index_shift](vk::CommandBuffer cmdbuf) {
321 static constexpr u32 DISPATCH_SIZE = 1024; 324 static constexpr u32 DISPATCH_SIZE = 1024;
322 static constexpr VkMemoryBarrier WRITE_BARRIER{ 325 static constexpr VkMemoryBarrier WRITE_BARRIER{
323 .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER, 326 .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER,
@@ -325,10 +328,12 @@ std::pair<VkBuffer, VkDeviceSize> QuadIndexedPass::Assemble(
325 .srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT, 328 .srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT,
326 .dstAccessMask = VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT, 329 .dstAccessMask = VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT,
327 }; 330 };
328 const std::array push_constants = {base_vertex, index_shift}; 331 const std::array push_constants{base_vertex, index_shift};
329 cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, pipeline); 332 const VkDescriptorSet set = descriptor_allocator.Commit();
330 cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, layout, 0, set, {}); 333 device.GetLogical().UpdateDescriptorSet(set, *descriptor_template, descriptor_data);
331 cmdbuf.PushConstants(layout, VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(push_constants), 334 cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline);
335 cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, *layout, 0, set, {});
336 cmdbuf.PushConstants(*layout, VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(push_constants),
332 &push_constants); 337 &push_constants);
333 cmdbuf.Dispatch(Common::DivCeil(num_tri_vertices, DISPATCH_SIZE), 1, 1); 338 cmdbuf.Dispatch(Common::DivCeil(num_tri_vertices, DISPATCH_SIZE), 1, 1);
334 cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 339 cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
@@ -338,15 +343,14 @@ std::pair<VkBuffer, VkDeviceSize> QuadIndexedPass::Assemble(
338} 343}
339 344
340ASTCDecoderPass::ASTCDecoderPass(const Device& device_, VKScheduler& scheduler_, 345ASTCDecoderPass::ASTCDecoderPass(const Device& device_, VKScheduler& scheduler_,
341 VKDescriptorPool& descriptor_pool_, 346 DescriptorPool& descriptor_pool_,
342 StagingBufferPool& staging_buffer_pool_, 347 StagingBufferPool& staging_buffer_pool_,
343 VKUpdateDescriptorQueue& update_descriptor_queue_, 348 VKUpdateDescriptorQueue& update_descriptor_queue_,
344 MemoryAllocator& memory_allocator_) 349 MemoryAllocator& memory_allocator_)
345 : VKComputePass(device_, descriptor_pool_, BuildASTCDescriptorSetBindings(), 350 : ComputePass(device_, descriptor_pool_, ASTC_DESCRIPTOR_SET_BINDINGS,
346 BuildASTCPassDescriptorUpdateTemplateEntry(), 351 ASTC_PASS_DESCRIPTOR_UPDATE_TEMPLATE_ENTRY, ASTC_BANK_INFO,
347 BuildComputePushConstantRange(sizeof(AstcPushConstants)), 352 COMPUTE_PUSH_CONSTANT_RANGE<sizeof(AstcPushConstants)>, ASTC_DECODER_COMP_SPV),
348 ASTC_DECODER_COMP_SPV), 353 scheduler{scheduler_}, staging_buffer_pool{staging_buffer_pool_},
349 device{device_}, scheduler{scheduler_}, staging_buffer_pool{staging_buffer_pool_},
350 update_descriptor_queue{update_descriptor_queue_}, memory_allocator{memory_allocator_} {} 354 update_descriptor_queue{update_descriptor_queue_}, memory_allocator{memory_allocator_} {}
351 355
352ASTCDecoderPass::~ASTCDecoderPass() = default; 356ASTCDecoderPass::~ASTCDecoderPass() = default;
@@ -444,16 +448,14 @@ void ASTCDecoderPass::Assemble(Image& image, const StagingBufferRef& map,
444 update_descriptor_queue.AddBuffer(*data_buffer, sizeof(ASTC_ENCODINGS_VALUES), 448 update_descriptor_queue.AddBuffer(*data_buffer, sizeof(ASTC_ENCODINGS_VALUES),
445 sizeof(SWIZZLE_TABLE)); 449 sizeof(SWIZZLE_TABLE));
446 update_descriptor_queue.AddImage(image.StorageImageView(swizzle.level)); 450 update_descriptor_queue.AddImage(image.StorageImageView(swizzle.level));
447 451 const void* const descriptor_data{update_descriptor_queue.UpdateData()};
448 const VkDescriptorSet set = CommitDescriptorSet(update_descriptor_queue);
449 const VkPipelineLayout vk_layout = *layout;
450 452
451 // To unswizzle the ASTC data 453 // To unswizzle the ASTC data
452 const auto params = MakeBlockLinearSwizzle2DParams(swizzle, image.info); 454 const auto params = MakeBlockLinearSwizzle2DParams(swizzle, image.info);
453 ASSERT(params.origin == (std::array<u32, 3>{0, 0, 0})); 455 ASSERT(params.origin == (std::array<u32, 3>{0, 0, 0}));
454 ASSERT(params.destination == (std::array<s32, 3>{0, 0, 0})); 456 ASSERT(params.destination == (std::array<s32, 3>{0, 0, 0}));
455 scheduler.Record([vk_layout, num_dispatches_x, num_dispatches_y, num_dispatches_z, 457 scheduler.Record([this, num_dispatches_x, num_dispatches_y, num_dispatches_z, block_dims,
456 block_dims, params, set](vk::CommandBuffer cmdbuf) { 458 params, descriptor_data](vk::CommandBuffer cmdbuf) {
457 const AstcPushConstants uniforms{ 459 const AstcPushConstants uniforms{
458 .blocks_dims = block_dims, 460 .blocks_dims = block_dims,
459 .bytes_per_block_log2 = params.bytes_per_block_log2, 461 .bytes_per_block_log2 = params.bytes_per_block_log2,
@@ -463,8 +465,10 @@ void ASTCDecoderPass::Assemble(Image& image, const StagingBufferRef& map,
463 .block_height = params.block_height, 465 .block_height = params.block_height,
464 .block_height_mask = params.block_height_mask, 466 .block_height_mask = params.block_height_mask,
465 }; 467 };
466 cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, vk_layout, 0, set, {}); 468 const VkDescriptorSet set = descriptor_allocator.Commit();
467 cmdbuf.PushConstants(vk_layout, VK_SHADER_STAGE_COMPUTE_BIT, uniforms); 469 device.GetLogical().UpdateDescriptorSet(set, *descriptor_template, descriptor_data);
470 cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, *layout, 0, set, {});
471 cmdbuf.PushConstants(*layout, VK_SHADER_STAGE_COMPUTE_BIT, uniforms);
468 cmdbuf.Dispatch(num_dispatches_x, num_dispatches_y, num_dispatches_z); 472 cmdbuf.Dispatch(num_dispatches_x, num_dispatches_y, num_dispatches_z);
469 }); 473 });
470 } 474 }
diff --git a/src/video_core/renderer_vulkan/vk_compute_pass.h b/src/video_core/renderer_vulkan/vk_compute_pass.h
index 5ea187c30..114aef2bd 100644
--- a/src/video_core/renderer_vulkan/vk_compute_pass.h
+++ b/src/video_core/renderer_vulkan/vk_compute_pass.h
@@ -4,7 +4,6 @@
4 4
5#pragma once 5#pragma once
6 6
7#include <optional>
8#include <span> 7#include <span>
9#include <utility> 8#include <utility>
10 9
@@ -27,31 +26,31 @@ class VKUpdateDescriptorQueue;
27class Image; 26class Image;
28struct StagingBufferRef; 27struct StagingBufferRef;
29 28
30class VKComputePass { 29class ComputePass {
31public: 30public:
32 explicit VKComputePass(const Device& device, VKDescriptorPool& descriptor_pool, 31 explicit ComputePass(const Device& device, DescriptorPool& descriptor_pool,
33 vk::Span<VkDescriptorSetLayoutBinding> bindings, 32 vk::Span<VkDescriptorSetLayoutBinding> bindings,
34 vk::Span<VkDescriptorUpdateTemplateEntryKHR> templates, 33 vk::Span<VkDescriptorUpdateTemplateEntryKHR> templates,
35 vk::Span<VkPushConstantRange> push_constants, std::span<const u32> code); 34 const DescriptorBankInfo& bank_info,
36 ~VKComputePass(); 35 vk::Span<VkPushConstantRange> push_constants, std::span<const u32> code);
36 ~ComputePass();
37 37
38protected: 38protected:
39 VkDescriptorSet CommitDescriptorSet(VKUpdateDescriptorQueue& update_descriptor_queue); 39 const Device& device;
40
41 vk::DescriptorUpdateTemplateKHR descriptor_template; 40 vk::DescriptorUpdateTemplateKHR descriptor_template;
42 vk::PipelineLayout layout; 41 vk::PipelineLayout layout;
43 vk::Pipeline pipeline; 42 vk::Pipeline pipeline;
43 vk::DescriptorSetLayout descriptor_set_layout;
44 DescriptorAllocator descriptor_allocator;
44 45
45private: 46private:
46 vk::DescriptorSetLayout descriptor_set_layout;
47 std::optional<DescriptorAllocator> descriptor_allocator;
48 vk::ShaderModule module; 47 vk::ShaderModule module;
49}; 48};
50 49
51class Uint8Pass final : public VKComputePass { 50class Uint8Pass final : public ComputePass {
52public: 51public:
53 explicit Uint8Pass(const Device& device_, VKScheduler& scheduler_, 52 explicit Uint8Pass(const Device& device_, VKScheduler& scheduler_,
54 VKDescriptorPool& descriptor_pool_, StagingBufferPool& staging_buffer_pool_, 53 DescriptorPool& descriptor_pool_, StagingBufferPool& staging_buffer_pool_,
55 VKUpdateDescriptorQueue& update_descriptor_queue_); 54 VKUpdateDescriptorQueue& update_descriptor_queue_);
56 ~Uint8Pass(); 55 ~Uint8Pass();
57 56
@@ -66,10 +65,10 @@ private:
66 VKUpdateDescriptorQueue& update_descriptor_queue; 65 VKUpdateDescriptorQueue& update_descriptor_queue;
67}; 66};
68 67
69class QuadIndexedPass final : public VKComputePass { 68class QuadIndexedPass final : public ComputePass {
70public: 69public:
71 explicit QuadIndexedPass(const Device& device_, VKScheduler& scheduler_, 70 explicit QuadIndexedPass(const Device& device_, VKScheduler& scheduler_,
72 VKDescriptorPool& descriptor_pool_, 71 DescriptorPool& descriptor_pool_,
73 StagingBufferPool& staging_buffer_pool_, 72 StagingBufferPool& staging_buffer_pool_,
74 VKUpdateDescriptorQueue& update_descriptor_queue_); 73 VKUpdateDescriptorQueue& update_descriptor_queue_);
75 ~QuadIndexedPass(); 74 ~QuadIndexedPass();
@@ -84,10 +83,10 @@ private:
84 VKUpdateDescriptorQueue& update_descriptor_queue; 83 VKUpdateDescriptorQueue& update_descriptor_queue;
85}; 84};
86 85
87class ASTCDecoderPass final : public VKComputePass { 86class ASTCDecoderPass final : public ComputePass {
88public: 87public:
89 explicit ASTCDecoderPass(const Device& device_, VKScheduler& scheduler_, 88 explicit ASTCDecoderPass(const Device& device_, VKScheduler& scheduler_,
90 VKDescriptorPool& descriptor_pool_, 89 DescriptorPool& descriptor_pool_,
91 StagingBufferPool& staging_buffer_pool_, 90 StagingBufferPool& staging_buffer_pool_,
92 VKUpdateDescriptorQueue& update_descriptor_queue_, 91 VKUpdateDescriptorQueue& update_descriptor_queue_,
93 MemoryAllocator& memory_allocator_); 92 MemoryAllocator& memory_allocator_);
@@ -99,7 +98,6 @@ public:
99private: 98private:
100 void MakeDataBuffer(); 99 void MakeDataBuffer();
101 100
102 const Device& device;
103 VKScheduler& scheduler; 101 VKScheduler& scheduler;
104 StagingBufferPool& staging_buffer_pool; 102 StagingBufferPool& staging_buffer_pool;
105 VKUpdateDescriptorQueue& update_descriptor_queue; 103 VKUpdateDescriptorQueue& update_descriptor_queue;
diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp
index 3a48219b7..70b84c7a6 100644
--- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp
+++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp
@@ -2,152 +2,198 @@
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include <algorithm>
5#include <vector> 6#include <vector>
6 7
8#include <boost/container/small_vector.hpp>
9
10#include "video_core/renderer_vulkan/pipeline_helper.h"
11#include "video_core/renderer_vulkan/vk_buffer_cache.h"
7#include "video_core/renderer_vulkan/vk_compute_pipeline.h" 12#include "video_core/renderer_vulkan/vk_compute_pipeline.h"
8#include "video_core/renderer_vulkan/vk_descriptor_pool.h" 13#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
9#include "video_core/renderer_vulkan/vk_pipeline_cache.h" 14#include "video_core/renderer_vulkan/vk_pipeline_cache.h"
10#include "video_core/renderer_vulkan/vk_scheduler.h" 15#include "video_core/renderer_vulkan/vk_scheduler.h"
11#include "video_core/renderer_vulkan/vk_shader_decompiler.h"
12#include "video_core/renderer_vulkan/vk_update_descriptor.h" 16#include "video_core/renderer_vulkan/vk_update_descriptor.h"
17#include "video_core/shader_notify.h"
13#include "video_core/vulkan_common/vulkan_device.h" 18#include "video_core/vulkan_common/vulkan_device.h"
14#include "video_core/vulkan_common/vulkan_wrapper.h" 19#include "video_core/vulkan_common/vulkan_wrapper.h"
15 20
16namespace Vulkan { 21namespace Vulkan {
17 22
18VKComputePipeline::VKComputePipeline(const Device& device_, VKScheduler& scheduler_, 23using Shader::ImageBufferDescriptor;
19 VKDescriptorPool& descriptor_pool_, 24using Tegra::Texture::TexturePair;
20 VKUpdateDescriptorQueue& update_descriptor_queue_, 25
21 const SPIRVShader& shader_) 26ComputePipeline::ComputePipeline(const Device& device_, DescriptorPool& descriptor_pool,
22 : device{device_}, scheduler{scheduler_}, entries{shader_.entries}, 27 VKUpdateDescriptorQueue& update_descriptor_queue_,
23 descriptor_set_layout{CreateDescriptorSetLayout()}, 28 Common::ThreadWorker* thread_worker,
24 descriptor_allocator{descriptor_pool_, *descriptor_set_layout}, 29 VideoCore::ShaderNotify* shader_notify, const Shader::Info& info_,
25 update_descriptor_queue{update_descriptor_queue_}, layout{CreatePipelineLayout()}, 30 vk::ShaderModule spv_module_)
26 descriptor_template{CreateDescriptorUpdateTemplate()}, 31 : device{device_}, update_descriptor_queue{update_descriptor_queue_}, info{info_},
27 shader_module{CreateShaderModule(shader_.code)}, pipeline{CreatePipeline()} {} 32 spv_module(std::move(spv_module_)) {
28 33 if (shader_notify) {
29VKComputePipeline::~VKComputePipeline() = default; 34 shader_notify->MarkShaderBuilding();
30
31VkDescriptorSet VKComputePipeline::CommitDescriptorSet() {
32 if (!descriptor_template) {
33 return {};
34 }
35 const VkDescriptorSet set = descriptor_allocator.Commit();
36 update_descriptor_queue.Send(*descriptor_template, set);
37 return set;
38}
39
40vk::DescriptorSetLayout VKComputePipeline::CreateDescriptorSetLayout() const {
41 std::vector<VkDescriptorSetLayoutBinding> bindings;
42 u32 binding = 0;
43 const auto add_bindings = [&](VkDescriptorType descriptor_type, std::size_t num_entries) {
44 // TODO(Rodrigo): Maybe make individual bindings here?
45 for (u32 bindpoint = 0; bindpoint < static_cast<u32>(num_entries); ++bindpoint) {
46 bindings.push_back({
47 .binding = binding++,
48 .descriptorType = descriptor_type,
49 .descriptorCount = 1,
50 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
51 .pImmutableSamplers = nullptr,
52 });
53 }
54 };
55 add_bindings(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, entries.const_buffers.size());
56 add_bindings(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, entries.global_buffers.size());
57 add_bindings(VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, entries.uniform_texels.size());
58 add_bindings(VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, entries.samplers.size());
59 add_bindings(VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER, entries.storage_texels.size());
60 add_bindings(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, entries.images.size());
61
62 return device.GetLogical().CreateDescriptorSetLayout({
63 .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
64 .pNext = nullptr,
65 .flags = 0,
66 .bindingCount = static_cast<u32>(bindings.size()),
67 .pBindings = bindings.data(),
68 });
69}
70
71vk::PipelineLayout VKComputePipeline::CreatePipelineLayout() const {
72 return device.GetLogical().CreatePipelineLayout({
73 .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
74 .pNext = nullptr,
75 .flags = 0,
76 .setLayoutCount = 1,
77 .pSetLayouts = descriptor_set_layout.address(),
78 .pushConstantRangeCount = 0,
79 .pPushConstantRanges = nullptr,
80 });
81}
82
83vk::DescriptorUpdateTemplateKHR VKComputePipeline::CreateDescriptorUpdateTemplate() const {
84 std::vector<VkDescriptorUpdateTemplateEntryKHR> template_entries;
85 u32 binding = 0;
86 u32 offset = 0;
87 FillDescriptorUpdateTemplateEntries(entries, binding, offset, template_entries);
88 if (template_entries.empty()) {
89 // If the shader doesn't use descriptor sets, skip template creation.
90 return {};
91 } 35 }
92 36 std::copy_n(info.constant_buffer_used_sizes.begin(), uniform_buffer_sizes.size(),
93 return device.GetLogical().CreateDescriptorUpdateTemplateKHR({ 37 uniform_buffer_sizes.begin());
94 .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_UPDATE_TEMPLATE_CREATE_INFO_KHR, 38
95 .pNext = nullptr, 39 auto func{[this, &descriptor_pool, shader_notify] {
96 .flags = 0, 40 DescriptorLayoutBuilder builder{device};
97 .descriptorUpdateEntryCount = static_cast<u32>(template_entries.size()), 41 builder.Add(info, VK_SHADER_STAGE_COMPUTE_BIT);
98 .pDescriptorUpdateEntries = template_entries.data(), 42
99 .templateType = VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_DESCRIPTOR_SET_KHR, 43 descriptor_set_layout = builder.CreateDescriptorSetLayout(false);
100 .descriptorSetLayout = *descriptor_set_layout, 44 pipeline_layout = builder.CreatePipelineLayout(*descriptor_set_layout);
101 .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS, 45 descriptor_update_template =
102 .pipelineLayout = *layout, 46 builder.CreateTemplate(*descriptor_set_layout, *pipeline_layout, false);
103 .set = DESCRIPTOR_SET, 47 descriptor_allocator = descriptor_pool.Allocator(*descriptor_set_layout, info);
104 }); 48 const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT subgroup_size_ci{
105} 49 .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT,
106 50 .pNext = nullptr,
107vk::ShaderModule VKComputePipeline::CreateShaderModule(const std::vector<u32>& code) const { 51 .requiredSubgroupSize = GuestWarpSize,
108 device.SaveShader(code); 52 };
109 53 pipeline = device.GetLogical().CreateComputePipeline({
110 return device.GetLogical().CreateShaderModule({ 54 .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
111 .sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO, 55 .pNext = nullptr,
112 .pNext = nullptr, 56 .flags = 0,
113 .flags = 0, 57 .stage{
114 .codeSize = code.size() * sizeof(u32),
115 .pCode = code.data(),
116 });
117}
118
119vk::Pipeline VKComputePipeline::CreatePipeline() const {
120
121 VkComputePipelineCreateInfo ci{
122 .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
123 .pNext = nullptr,
124 .flags = 0,
125 .stage =
126 {
127 .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, 58 .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
128 .pNext = nullptr, 59 .pNext = device.IsExtSubgroupSizeControlSupported() ? &subgroup_size_ci : nullptr,
129 .flags = 0, 60 .flags = 0,
130 .stage = VK_SHADER_STAGE_COMPUTE_BIT, 61 .stage = VK_SHADER_STAGE_COMPUTE_BIT,
131 .module = *shader_module, 62 .module = *spv_module,
132 .pName = "main", 63 .pName = "main",
133 .pSpecializationInfo = nullptr, 64 .pSpecializationInfo = nullptr,
134 }, 65 },
135 .layout = *layout, 66 .layout = *pipeline_layout,
136 .basePipelineHandle = nullptr, 67 .basePipelineHandle = 0,
137 .basePipelineIndex = 0, 68 .basePipelineIndex = 0,
138 }; 69 });
139 70 std::lock_guard lock{build_mutex};
140 const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT subgroup_size_ci{ 71 is_built = true;
141 .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT, 72 build_condvar.notify_one();
142 .pNext = nullptr, 73 if (shader_notify) {
143 .requiredSubgroupSize = GuestWarpSize, 74 shader_notify->MarkShaderComplete();
144 }; 75 }
145 76 }};
146 if (entries.uses_warps && device.IsGuestWarpSizeSupported(VK_SHADER_STAGE_COMPUTE_BIT)) { 77 if (thread_worker) {
147 ci.stage.pNext = &subgroup_size_ci; 78 thread_worker->QueueWork(std::move(func));
79 } else {
80 func();
81 }
82}
83
84void ComputePipeline::Configure(Tegra::Engines::KeplerCompute& kepler_compute,
85 Tegra::MemoryManager& gpu_memory, VKScheduler& scheduler,
86 BufferCache& buffer_cache, TextureCache& texture_cache) {
87 update_descriptor_queue.Acquire();
88
89 buffer_cache.SetComputeUniformBufferState(info.constant_buffer_mask, &uniform_buffer_sizes);
90 buffer_cache.UnbindComputeStorageBuffers();
91 size_t ssbo_index{};
92 for (const auto& desc : info.storage_buffers_descriptors) {
93 ASSERT(desc.count == 1);
94 buffer_cache.BindComputeStorageBuffer(ssbo_index, desc.cbuf_index, desc.cbuf_offset,
95 desc.is_written);
96 ++ssbo_index;
148 } 97 }
149 98
150 return device.GetLogical().CreateComputePipeline(ci); 99 texture_cache.SynchronizeComputeDescriptors();
100
101 static constexpr size_t max_elements = 64;
102 std::array<ImageId, max_elements> image_view_ids;
103 boost::container::static_vector<u32, max_elements> image_view_indices;
104 boost::container::static_vector<VkSampler, max_elements> samplers;
105
106 const auto& qmd{kepler_compute.launch_description};
107 const auto& cbufs{qmd.const_buffer_config};
108 const bool via_header_index{qmd.linked_tsc != 0};
109 const auto read_handle{[&](const auto& desc, u32 index) {
110 ASSERT(((qmd.const_buffer_enable_mask >> desc.cbuf_index) & 1) != 0);
111 const u32 index_offset{index << desc.size_shift};
112 const u32 offset{desc.cbuf_offset + index_offset};
113 const GPUVAddr addr{cbufs[desc.cbuf_index].Address() + offset};
114 if constexpr (std::is_same_v<decltype(desc), const Shader::TextureDescriptor&> ||
115 std::is_same_v<decltype(desc), const Shader::TextureBufferDescriptor&>) {
116 if (desc.has_secondary) {
117 ASSERT(((qmd.const_buffer_enable_mask >> desc.secondary_cbuf_index) & 1) != 0);
118 const u32 secondary_offset{desc.secondary_cbuf_offset + index_offset};
119 const GPUVAddr separate_addr{cbufs[desc.secondary_cbuf_index].Address() +
120 secondary_offset};
121 const u32 lhs_raw{gpu_memory.Read<u32>(addr)};
122 const u32 rhs_raw{gpu_memory.Read<u32>(separate_addr)};
123 return TexturePair(lhs_raw | rhs_raw, via_header_index);
124 }
125 }
126 return TexturePair(gpu_memory.Read<u32>(addr), via_header_index);
127 }};
128 const auto add_image{[&](const auto& desc) {
129 for (u32 index = 0; index < desc.count; ++index) {
130 const auto handle{read_handle(desc, index)};
131 image_view_indices.push_back(handle.first);
132 }
133 }};
134 std::ranges::for_each(info.texture_buffer_descriptors, add_image);
135 std::ranges::for_each(info.image_buffer_descriptors, add_image);
136 for (const auto& desc : info.texture_descriptors) {
137 for (u32 index = 0; index < desc.count; ++index) {
138 const auto handle{read_handle(desc, index)};
139 image_view_indices.push_back(handle.first);
140
141 Sampler* const sampler = texture_cache.GetComputeSampler(handle.second);
142 samplers.push_back(sampler->Handle());
143 }
144 }
145 std::ranges::for_each(info.image_descriptors, add_image);
146
147 const std::span indices_span(image_view_indices.data(), image_view_indices.size());
148 texture_cache.FillComputeImageViews(indices_span, image_view_ids);
149
150 buffer_cache.UnbindComputeTextureBuffers();
151 ImageId* texture_buffer_ids{image_view_ids.data()};
152 size_t index{};
153 const auto add_buffer{[&](const auto& desc) {
154 constexpr bool is_image = std::is_same_v<decltype(desc), const ImageBufferDescriptor&>;
155 for (u32 i = 0; i < desc.count; ++i) {
156 bool is_written{false};
157 if constexpr (is_image) {
158 is_written = desc.is_written;
159 }
160 ImageView& image_view = texture_cache.GetImageView(*texture_buffer_ids);
161 buffer_cache.BindComputeTextureBuffer(index, image_view.GpuAddr(),
162 image_view.BufferSize(), image_view.format,
163 is_written, is_image);
164 ++texture_buffer_ids;
165 ++index;
166 }
167 }};
168 std::ranges::for_each(info.texture_buffer_descriptors, add_buffer);
169 std::ranges::for_each(info.image_buffer_descriptors, add_buffer);
170
171 buffer_cache.UpdateComputeBuffers();
172 buffer_cache.BindHostComputeBuffers();
173
174 const VkSampler* samplers_it{samplers.data()};
175 const ImageId* views_it{image_view_ids.data()};
176 PushImageDescriptors(info, samplers_it, views_it, texture_cache, update_descriptor_queue);
177
178 if (!is_built.load(std::memory_order::relaxed)) {
179 // Wait for the pipeline to be built
180 scheduler.Record([this](vk::CommandBuffer) {
181 std::unique_lock lock{build_mutex};
182 build_condvar.wait(lock, [this] { return is_built.load(std::memory_order::relaxed); });
183 });
184 }
185 const void* const descriptor_data{update_descriptor_queue.UpdateData()};
186 scheduler.Record([this, descriptor_data](vk::CommandBuffer cmdbuf) {
187 cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline);
188 if (!descriptor_set_layout) {
189 return;
190 }
191 const VkDescriptorSet descriptor_set{descriptor_allocator.Commit()};
192 const vk::Device& dev{device.GetLogical()};
193 dev.UpdateDescriptorSet(descriptor_set, *descriptor_update_template, descriptor_data);
194 cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline_layout, 0,
195 descriptor_set, nullptr);
196 });
151} 197}
152 198
153} // namespace Vulkan 199} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.h b/src/video_core/renderer_vulkan/vk_compute_pipeline.h
index 7e16575ac..52fec04d3 100644
--- a/src/video_core/renderer_vulkan/vk_compute_pipeline.h
+++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.h
@@ -4,61 +4,63 @@
4 4
5#pragma once 5#pragma once
6 6
7#include <atomic>
8#include <condition_variable>
9#include <mutex>
10
7#include "common/common_types.h" 11#include "common/common_types.h"
12#include "common/thread_worker.h"
13#include "shader_recompiler/shader_info.h"
14#include "video_core/memory_manager.h"
15#include "video_core/renderer_vulkan/vk_buffer_cache.h"
8#include "video_core/renderer_vulkan/vk_descriptor_pool.h" 16#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
9#include "video_core/renderer_vulkan/vk_shader_decompiler.h" 17#include "video_core/renderer_vulkan/vk_texture_cache.h"
18#include "video_core/renderer_vulkan/vk_update_descriptor.h"
10#include "video_core/vulkan_common/vulkan_wrapper.h" 19#include "video_core/vulkan_common/vulkan_wrapper.h"
11 20
21namespace VideoCore {
22class ShaderNotify;
23}
24
12namespace Vulkan { 25namespace Vulkan {
13 26
14class Device; 27class Device;
15class VKScheduler; 28class VKScheduler;
16class VKUpdateDescriptorQueue;
17 29
18class VKComputePipeline final { 30class ComputePipeline {
19public: 31public:
20 explicit VKComputePipeline(const Device& device_, VKScheduler& scheduler_, 32 explicit ComputePipeline(const Device& device, DescriptorPool& descriptor_pool,
21 VKDescriptorPool& descriptor_pool_, 33 VKUpdateDescriptorQueue& update_descriptor_queue,
22 VKUpdateDescriptorQueue& update_descriptor_queue_, 34 Common::ThreadWorker* thread_worker,
23 const SPIRVShader& shader_); 35 VideoCore::ShaderNotify* shader_notify, const Shader::Info& info,
24 ~VKComputePipeline(); 36 vk::ShaderModule spv_module);
25
26 VkDescriptorSet CommitDescriptorSet();
27 37
28 VkPipeline GetHandle() const { 38 ComputePipeline& operator=(ComputePipeline&&) noexcept = delete;
29 return *pipeline; 39 ComputePipeline(ComputePipeline&&) noexcept = delete;
30 }
31 40
32 VkPipelineLayout GetLayout() const { 41 ComputePipeline& operator=(const ComputePipeline&) = delete;
33 return *layout; 42 ComputePipeline(const ComputePipeline&) = delete;
34 }
35 43
36 const ShaderEntries& GetEntries() const { 44 void Configure(Tegra::Engines::KeplerCompute& kepler_compute, Tegra::MemoryManager& gpu_memory,
37 return entries; 45 VKScheduler& scheduler, BufferCache& buffer_cache, TextureCache& texture_cache);
38 }
39 46
40private: 47private:
41 vk::DescriptorSetLayout CreateDescriptorSetLayout() const;
42
43 vk::PipelineLayout CreatePipelineLayout() const;
44
45 vk::DescriptorUpdateTemplateKHR CreateDescriptorUpdateTemplate() const;
46
47 vk::ShaderModule CreateShaderModule(const std::vector<u32>& code) const;
48
49 vk::Pipeline CreatePipeline() const;
50
51 const Device& device; 48 const Device& device;
52 VKScheduler& scheduler; 49 VKUpdateDescriptorQueue& update_descriptor_queue;
53 ShaderEntries entries; 50 Shader::Info info;
54 51
52 VideoCommon::ComputeUniformBufferSizes uniform_buffer_sizes{};
53
54 vk::ShaderModule spv_module;
55 vk::DescriptorSetLayout descriptor_set_layout; 55 vk::DescriptorSetLayout descriptor_set_layout;
56 DescriptorAllocator descriptor_allocator; 56 DescriptorAllocator descriptor_allocator;
57 VKUpdateDescriptorQueue& update_descriptor_queue; 57 vk::PipelineLayout pipeline_layout;
58 vk::PipelineLayout layout; 58 vk::DescriptorUpdateTemplateKHR descriptor_update_template;
59 vk::DescriptorUpdateTemplateKHR descriptor_template;
60 vk::ShaderModule shader_module;
61 vk::Pipeline pipeline; 59 vk::Pipeline pipeline;
60
61 std::condition_variable build_condvar;
62 std::mutex build_mutex;
63 std::atomic_bool is_built{false};
62}; 64};
63 65
64} // namespace Vulkan 66} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_descriptor_pool.cpp b/src/video_core/renderer_vulkan/vk_descriptor_pool.cpp
index ef9fb5910..8e77e4796 100644
--- a/src/video_core/renderer_vulkan/vk_descriptor_pool.cpp
+++ b/src/video_core/renderer_vulkan/vk_descriptor_pool.cpp
@@ -2,6 +2,8 @@
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include <mutex>
6#include <span>
5#include <vector> 7#include <vector>
6 8
7#include "common/common_types.h" 9#include "common/common_types.h"
@@ -13,79 +15,149 @@
13 15
14namespace Vulkan { 16namespace Vulkan {
15 17
16// Prefer small grow rates to avoid saturating the descriptor pool with barely used pipelines. 18// Prefer small grow rates to avoid saturating the descriptor pool with barely used pipelines
17constexpr std::size_t SETS_GROW_RATE = 0x20; 19constexpr size_t SETS_GROW_RATE = 16;
20constexpr s32 SCORE_THRESHOLD = 3;
21constexpr u32 SETS_PER_POOL = 64;
18 22
19DescriptorAllocator::DescriptorAllocator(VKDescriptorPool& descriptor_pool_, 23struct DescriptorBank {
20 VkDescriptorSetLayout layout_) 24 DescriptorBankInfo info;
21 : ResourcePool(descriptor_pool_.master_semaphore, SETS_GROW_RATE), 25 std::vector<vk::DescriptorPool> pools;
22 descriptor_pool{descriptor_pool_}, layout{layout_} {} 26};
23 27
24DescriptorAllocator::~DescriptorAllocator() = default; 28bool DescriptorBankInfo::IsSuperset(const DescriptorBankInfo& subset) const noexcept {
29 return uniform_buffers >= subset.uniform_buffers && storage_buffers >= subset.storage_buffers &&
30 texture_buffers >= subset.texture_buffers && image_buffers >= subset.image_buffers &&
31 textures >= subset.textures && images >= subset.image_buffers;
32}
25 33
26VkDescriptorSet DescriptorAllocator::Commit() { 34template <typename Descriptors>
27 const std::size_t index = CommitResource(); 35static u32 Accumulate(const Descriptors& descriptors) {
28 return descriptors_allocations[index / SETS_GROW_RATE][index % SETS_GROW_RATE]; 36 u32 count = 0;
37 for (const auto& descriptor : descriptors) {
38 count += descriptor.count;
39 }
40 return count;
29} 41}
30 42
31void DescriptorAllocator::Allocate(std::size_t begin, std::size_t end) { 43static DescriptorBankInfo MakeBankInfo(std::span<const Shader::Info> infos) {
32 descriptors_allocations.push_back(descriptor_pool.AllocateDescriptors(layout, end - begin)); 44 DescriptorBankInfo bank;
45 for (const Shader::Info& info : infos) {
46 bank.uniform_buffers += Accumulate(info.constant_buffer_descriptors);
47 bank.storage_buffers += Accumulate(info.storage_buffers_descriptors);
48 bank.texture_buffers += Accumulate(info.texture_buffer_descriptors);
49 bank.image_buffers += Accumulate(info.image_buffer_descriptors);
50 bank.textures += Accumulate(info.texture_descriptors);
51 bank.images += Accumulate(info.image_descriptors);
52 }
53 bank.score = bank.uniform_buffers + bank.storage_buffers + bank.texture_buffers +
54 bank.image_buffers + bank.textures + bank.images;
55 return bank;
33} 56}
34 57
35VKDescriptorPool::VKDescriptorPool(const Device& device_, VKScheduler& scheduler) 58static void AllocatePool(const Device& device, DescriptorBank& bank) {
36 : device{device_}, master_semaphore{scheduler.GetMasterSemaphore()}, active_pool{ 59 std::array<VkDescriptorPoolSize, 6> pool_sizes;
37 AllocateNewPool()} {} 60 size_t pool_cursor{};
38 61 const auto add = [&](VkDescriptorType type, u32 count) {
39VKDescriptorPool::~VKDescriptorPool() = default; 62 if (count > 0) {
40 63 pool_sizes[pool_cursor++] = {
41vk::DescriptorPool* VKDescriptorPool::AllocateNewPool() { 64 .type = type,
42 static constexpr u32 num_sets = 0x20000; 65 .descriptorCount = count * SETS_PER_POOL,
43 static constexpr VkDescriptorPoolSize pool_sizes[] = { 66 };
44 {VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, num_sets * 90}, 67 }
45 {VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, num_sets * 60},
46 {VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, num_sets * 64},
47 {VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, num_sets * 64},
48 {VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER, num_sets * 64},
49 {VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, num_sets * 40},
50 }; 68 };
51 69 const auto& info{bank.info};
52 const VkDescriptorPoolCreateInfo ci{ 70 add(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, info.uniform_buffers);
71 add(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, info.storage_buffers);
72 add(VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, info.texture_buffers);
73 add(VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER, info.image_buffers);
74 add(VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, info.textures);
75 add(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, info.images);
76 bank.pools.push_back(device.GetLogical().CreateDescriptorPool({
53 .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO, 77 .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO,
54 .pNext = nullptr, 78 .pNext = nullptr,
55 .flags = VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 79 .flags = VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT,
56 .maxSets = num_sets, 80 .maxSets = SETS_PER_POOL,
57 .poolSizeCount = static_cast<u32>(std::size(pool_sizes)), 81 .poolSizeCount = static_cast<u32>(pool_cursor),
58 .pPoolSizes = std::data(pool_sizes), 82 .pPoolSizes = std::data(pool_sizes),
59 }; 83 }));
60 return &pools.emplace_back(device.GetLogical().CreateDescriptorPool(ci)); 84}
85
86DescriptorAllocator::DescriptorAllocator(const Device& device_, MasterSemaphore& master_semaphore_,
87 DescriptorBank& bank_, VkDescriptorSetLayout layout_)
88 : ResourcePool(master_semaphore_, SETS_GROW_RATE), device{&device_}, bank{&bank_},
89 layout{layout_} {}
90
91VkDescriptorSet DescriptorAllocator::Commit() {
92 const size_t index = CommitResource();
93 return sets[index / SETS_GROW_RATE][index % SETS_GROW_RATE];
61} 94}
62 95
63vk::DescriptorSets VKDescriptorPool::AllocateDescriptors(VkDescriptorSetLayout layout, 96void DescriptorAllocator::Allocate(size_t begin, size_t end) {
64 std::size_t count) { 97 sets.push_back(AllocateDescriptors(end - begin));
65 const std::vector layout_copies(count, layout); 98}
66 VkDescriptorSetAllocateInfo ai{ 99
100vk::DescriptorSets DescriptorAllocator::AllocateDescriptors(size_t count) {
101 const std::vector<VkDescriptorSetLayout> layouts(count, layout);
102 VkDescriptorSetAllocateInfo allocate_info{
67 .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO, 103 .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO,
68 .pNext = nullptr, 104 .pNext = nullptr,
69 .descriptorPool = **active_pool, 105 .descriptorPool = *bank->pools.back(),
70 .descriptorSetCount = static_cast<u32>(count), 106 .descriptorSetCount = static_cast<u32>(count),
71 .pSetLayouts = layout_copies.data(), 107 .pSetLayouts = layouts.data(),
72 }; 108 };
73 109 vk::DescriptorSets new_sets = bank->pools.back().Allocate(allocate_info);
74 vk::DescriptorSets sets = active_pool->Allocate(ai); 110 if (!new_sets.IsOutOfPoolMemory()) {
75 if (!sets.IsOutOfPoolMemory()) { 111 return new_sets;
76 return sets;
77 } 112 }
78
79 // Our current pool is out of memory. Allocate a new one and retry 113 // Our current pool is out of memory. Allocate a new one and retry
80 active_pool = AllocateNewPool(); 114 AllocatePool(*device, *bank);
81 ai.descriptorPool = **active_pool; 115 allocate_info.descriptorPool = *bank->pools.back();
82 sets = active_pool->Allocate(ai); 116 new_sets = bank->pools.back().Allocate(allocate_info);
83 if (!sets.IsOutOfPoolMemory()) { 117 if (!new_sets.IsOutOfPoolMemory()) {
84 return sets; 118 return new_sets;
85 } 119 }
86
87 // After allocating a new pool, we are out of memory again. We can't handle this from here. 120 // After allocating a new pool, we are out of memory again. We can't handle this from here.
88 throw vk::Exception(VK_ERROR_OUT_OF_POOL_MEMORY); 121 throw vk::Exception(VK_ERROR_OUT_OF_POOL_MEMORY);
89} 122}
90 123
124DescriptorPool::DescriptorPool(const Device& device_, VKScheduler& scheduler)
125 : device{device_}, master_semaphore{scheduler.GetMasterSemaphore()} {}
126
127DescriptorPool::~DescriptorPool() = default;
128
129DescriptorAllocator DescriptorPool::Allocator(VkDescriptorSetLayout layout,
130 std::span<const Shader::Info> infos) {
131 return Allocator(layout, MakeBankInfo(infos));
132}
133
134DescriptorAllocator DescriptorPool::Allocator(VkDescriptorSetLayout layout,
135 const Shader::Info& info) {
136 return Allocator(layout, MakeBankInfo(std::array{info}));
137}
138
139DescriptorAllocator DescriptorPool::Allocator(VkDescriptorSetLayout layout,
140 const DescriptorBankInfo& info) {
141 return DescriptorAllocator(device, master_semaphore, Bank(info), layout);
142}
143
144DescriptorBank& DescriptorPool::Bank(const DescriptorBankInfo& reqs) {
145 std::shared_lock read_lock{banks_mutex};
146 const auto it = std::ranges::find_if(bank_infos, [&reqs](const DescriptorBankInfo& bank) {
147 return std::abs(bank.score - reqs.score) < SCORE_THRESHOLD && bank.IsSuperset(reqs);
148 });
149 if (it != bank_infos.end()) {
150 return *banks[std::distance(bank_infos.begin(), it)].get();
151 }
152 read_lock.unlock();
153
154 std::unique_lock write_lock{banks_mutex};
155 bank_infos.push_back(reqs);
156
157 auto& bank = *banks.emplace_back(std::make_unique<DescriptorBank>());
158 bank.info = reqs;
159 AllocatePool(device, bank);
160 return bank;
161}
162
91} // namespace Vulkan 163} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_descriptor_pool.h b/src/video_core/renderer_vulkan/vk_descriptor_pool.h
index f892be7be..59466aac5 100644
--- a/src/video_core/renderer_vulkan/vk_descriptor_pool.h
+++ b/src/video_core/renderer_vulkan/vk_descriptor_pool.h
@@ -4,57 +4,85 @@
4 4
5#pragma once 5#pragma once
6 6
7#include <shared_mutex>
8#include <span>
7#include <vector> 9#include <vector>
8 10
11#include "shader_recompiler/shader_info.h"
9#include "video_core/renderer_vulkan/vk_resource_pool.h" 12#include "video_core/renderer_vulkan/vk_resource_pool.h"
10#include "video_core/vulkan_common/vulkan_wrapper.h" 13#include "video_core/vulkan_common/vulkan_wrapper.h"
11 14
12namespace Vulkan { 15namespace Vulkan {
13 16
14class Device; 17class Device;
15class VKDescriptorPool;
16class VKScheduler; 18class VKScheduler;
17 19
20struct DescriptorBank;
21
22struct DescriptorBankInfo {
23 [[nodiscard]] bool IsSuperset(const DescriptorBankInfo& subset) const noexcept;
24
25 u32 uniform_buffers{}; ///< Number of uniform buffer descriptors
26 u32 storage_buffers{}; ///< Number of storage buffer descriptors
27 u32 texture_buffers{}; ///< Number of texture buffer descriptors
28 u32 image_buffers{}; ///< Number of image buffer descriptors
29 u32 textures{}; ///< Number of texture descriptors
30 u32 images{}; ///< Number of image descriptors
31 s32 score{}; ///< Number of descriptors in total
32};
33
18class DescriptorAllocator final : public ResourcePool { 34class DescriptorAllocator final : public ResourcePool {
35 friend class DescriptorPool;
36
19public: 37public:
20 explicit DescriptorAllocator(VKDescriptorPool& descriptor_pool, VkDescriptorSetLayout layout); 38 explicit DescriptorAllocator() = default;
21 ~DescriptorAllocator() override; 39 ~DescriptorAllocator() override = default;
40
41 DescriptorAllocator& operator=(DescriptorAllocator&&) noexcept = default;
42 DescriptorAllocator(DescriptorAllocator&&) noexcept = default;
22 43
23 DescriptorAllocator& operator=(const DescriptorAllocator&) = delete; 44 DescriptorAllocator& operator=(const DescriptorAllocator&) = delete;
24 DescriptorAllocator(const DescriptorAllocator&) = delete; 45 DescriptorAllocator(const DescriptorAllocator&) = delete;
25 46
26 VkDescriptorSet Commit(); 47 VkDescriptorSet Commit();
27 48
28protected:
29 void Allocate(std::size_t begin, std::size_t end) override;
30
31private: 49private:
32 VKDescriptorPool& descriptor_pool; 50 explicit DescriptorAllocator(const Device& device_, MasterSemaphore& master_semaphore_,
33 const VkDescriptorSetLayout layout; 51 DescriptorBank& bank_, VkDescriptorSetLayout layout_);
34 52
35 std::vector<vk::DescriptorSets> descriptors_allocations; 53 void Allocate(size_t begin, size_t end) override;
36}; 54
55 vk::DescriptorSets AllocateDescriptors(size_t count);
56
57 const Device* device{};
58 DescriptorBank* bank{};
59 VkDescriptorSetLayout layout{};
37 60
38class VKDescriptorPool final { 61 std::vector<vk::DescriptorSets> sets;
39 friend DescriptorAllocator; 62};
40 63
64class DescriptorPool {
41public: 65public:
42 explicit VKDescriptorPool(const Device& device, VKScheduler& scheduler); 66 explicit DescriptorPool(const Device& device, VKScheduler& scheduler);
43 ~VKDescriptorPool(); 67 ~DescriptorPool();
44 68
45 VKDescriptorPool(const VKDescriptorPool&) = delete; 69 DescriptorPool& operator=(const DescriptorPool&) = delete;
46 VKDescriptorPool& operator=(const VKDescriptorPool&) = delete; 70 DescriptorPool(const DescriptorPool&) = delete;
47 71
48private: 72 DescriptorAllocator Allocator(VkDescriptorSetLayout layout,
49 vk::DescriptorPool* AllocateNewPool(); 73 std::span<const Shader::Info> infos);
74 DescriptorAllocator Allocator(VkDescriptorSetLayout layout, const Shader::Info& info);
75 DescriptorAllocator Allocator(VkDescriptorSetLayout layout, const DescriptorBankInfo& info);
50 76
51 vk::DescriptorSets AllocateDescriptors(VkDescriptorSetLayout layout, std::size_t count); 77private:
78 DescriptorBank& Bank(const DescriptorBankInfo& reqs);
52 79
53 const Device& device; 80 const Device& device;
54 MasterSemaphore& master_semaphore; 81 MasterSemaphore& master_semaphore;
55 82
56 std::vector<vk::DescriptorPool> pools; 83 std::shared_mutex banks_mutex;
57 vk::DescriptorPool* active_pool; 84 std::vector<DescriptorBankInfo> bank_infos;
85 std::vector<std::unique_ptr<DescriptorBank>> banks;
58}; 86};
59 87
60} // namespace Vulkan \ No newline at end of file 88} // namespace Vulkan \ No newline at end of file
diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp
index fc6dd83eb..18482e1d0 100644
--- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp
+++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp
@@ -1,29 +1,58 @@
1// Copyright 2019 yuzu Emulator Project 1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include <algorithm> 5#include <algorithm>
6#include <array> 6#include <span>
7#include <cstring>
8#include <vector>
9 7
10#include "common/common_types.h" 8#include <boost/container/small_vector.hpp>
11#include "common/microprofile.h" 9#include <boost/container/static_vector.hpp>
12#include "video_core/renderer_vulkan/fixed_pipeline_state.h" 10
11#include "common/bit_field.h"
13#include "video_core/renderer_vulkan/maxwell_to_vk.h" 12#include "video_core/renderer_vulkan/maxwell_to_vk.h"
14#include "video_core/renderer_vulkan/vk_descriptor_pool.h" 13#include "video_core/renderer_vulkan/pipeline_helper.h"
14#include "video_core/renderer_vulkan/vk_buffer_cache.h"
15#include "video_core/renderer_vulkan/vk_graphics_pipeline.h" 15#include "video_core/renderer_vulkan/vk_graphics_pipeline.h"
16#include "video_core/renderer_vulkan/vk_pipeline_cache.h" 16#include "video_core/renderer_vulkan/vk_render_pass_cache.h"
17#include "video_core/renderer_vulkan/vk_scheduler.h" 17#include "video_core/renderer_vulkan/vk_scheduler.h"
18#include "video_core/renderer_vulkan/vk_texture_cache.h"
18#include "video_core/renderer_vulkan/vk_update_descriptor.h" 19#include "video_core/renderer_vulkan/vk_update_descriptor.h"
20#include "video_core/shader_notify.h"
19#include "video_core/vulkan_common/vulkan_device.h" 21#include "video_core/vulkan_common/vulkan_device.h"
20#include "video_core/vulkan_common/vulkan_wrapper.h"
21
22namespace Vulkan {
23 22
24MICROPROFILE_DECLARE(Vulkan_PipelineCache); 23#if defined(_MSC_VER) && defined(NDEBUG)
24#define LAMBDA_FORCEINLINE [[msvc::forceinline]]
25#else
26#define LAMBDA_FORCEINLINE
27#endif
25 28
29namespace Vulkan {
26namespace { 30namespace {
31using boost::container::small_vector;
32using boost::container::static_vector;
33using Shader::ImageBufferDescriptor;
34using Tegra::Texture::TexturePair;
35using VideoCore::Surface::PixelFormat;
36using VideoCore::Surface::PixelFormatFromDepthFormat;
37using VideoCore::Surface::PixelFormatFromRenderTargetFormat;
38
39constexpr size_t NUM_STAGES = Maxwell::MaxShaderStage;
40constexpr size_t MAX_IMAGE_ELEMENTS = 64;
41
42DescriptorLayoutBuilder MakeBuilder(const Device& device, std::span<const Shader::Info> infos) {
43 DescriptorLayoutBuilder builder{device};
44 for (size_t index = 0; index < infos.size(); ++index) {
45 static constexpr std::array stages{
46 VK_SHADER_STAGE_VERTEX_BIT,
47 VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT,
48 VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT,
49 VK_SHADER_STAGE_GEOMETRY_BIT,
50 VK_SHADER_STAGE_FRAGMENT_BIT,
51 };
52 builder.Add(infos[index], stages.at(index));
53 }
54 return builder;
55}
27 56
28template <class StencilFace> 57template <class StencilFace>
29VkStencilOpState GetStencilFaceState(const StencilFace& face) { 58VkStencilOpState GetStencilFaceState(const StencilFace& face) {
@@ -39,15 +68,24 @@ VkStencilOpState GetStencilFaceState(const StencilFace& face) {
39} 68}
40 69
41bool SupportsPrimitiveRestart(VkPrimitiveTopology topology) { 70bool SupportsPrimitiveRestart(VkPrimitiveTopology topology) {
42 static constexpr std::array unsupported_topologies = { 71 static constexpr std::array unsupported_topologies{
43 VK_PRIMITIVE_TOPOLOGY_POINT_LIST, 72 VK_PRIMITIVE_TOPOLOGY_POINT_LIST,
44 VK_PRIMITIVE_TOPOLOGY_LINE_LIST, 73 VK_PRIMITIVE_TOPOLOGY_LINE_LIST,
45 VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST, 74 VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST,
46 VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY, 75 VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY,
47 VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY, 76 VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY,
48 VK_PRIMITIVE_TOPOLOGY_PATCH_LIST}; 77 VK_PRIMITIVE_TOPOLOGY_PATCH_LIST,
49 return std::find(std::begin(unsupported_topologies), std::end(unsupported_topologies), 78 // VK_PRIMITIVE_TOPOLOGY_QUAD_LIST_EXT,
50 topology) == std::end(unsupported_topologies); 79 };
80 return std::ranges::find(unsupported_topologies, topology) == unsupported_topologies.end();
81}
82
83bool IsLine(VkPrimitiveTopology topology) {
84 static constexpr std::array line_topologies{
85 VK_PRIMITIVE_TOPOLOGY_LINE_LIST, VK_PRIMITIVE_TOPOLOGY_LINE_STRIP,
86 // VK_PRIMITIVE_TOPOLOGY_LINE_LOOP_EXT,
87 };
88 return std::ranges::find(line_topologies, topology) == line_topologies.end();
51} 89}
52 90
53VkViewportSwizzleNV UnpackViewportSwizzle(u16 swizzle) { 91VkViewportSwizzleNV UnpackViewportSwizzle(u16 swizzle) {
@@ -59,8 +97,7 @@ VkViewportSwizzleNV UnpackViewportSwizzle(u16 swizzle) {
59 BitField<12, 3, Maxwell::ViewportSwizzle> w; 97 BitField<12, 3, Maxwell::ViewportSwizzle> w;
60 }; 98 };
61 const Swizzle unpacked{swizzle}; 99 const Swizzle unpacked{swizzle};
62 100 return VkViewportSwizzleNV{
63 return {
64 .x = MaxwellToVK::ViewportSwizzle(unpacked.x), 101 .x = MaxwellToVK::ViewportSwizzle(unpacked.x),
65 .y = MaxwellToVK::ViewportSwizzle(unpacked.y), 102 .y = MaxwellToVK::ViewportSwizzle(unpacked.y),
66 .z = MaxwellToVK::ViewportSwizzle(unpacked.z), 103 .z = MaxwellToVK::ViewportSwizzle(unpacked.z),
@@ -68,193 +105,446 @@ VkViewportSwizzleNV UnpackViewportSwizzle(u16 swizzle) {
68 }; 105 };
69} 106}
70 107
71VkSampleCountFlagBits ConvertMsaaMode(Tegra::Texture::MsaaMode msaa_mode) { 108PixelFormat DecodeFormat(u8 encoded_format) {
72 switch (msaa_mode) { 109 const auto format{static_cast<Tegra::RenderTargetFormat>(encoded_format)};
73 case Tegra::Texture::MsaaMode::Msaa1x1: 110 if (format == Tegra::RenderTargetFormat::NONE) {
74 return VK_SAMPLE_COUNT_1_BIT; 111 return PixelFormat::Invalid;
75 case Tegra::Texture::MsaaMode::Msaa2x1:
76 case Tegra::Texture::MsaaMode::Msaa2x1_D3D:
77 return VK_SAMPLE_COUNT_2_BIT;
78 case Tegra::Texture::MsaaMode::Msaa2x2:
79 case Tegra::Texture::MsaaMode::Msaa2x2_VC4:
80 case Tegra::Texture::MsaaMode::Msaa2x2_VC12:
81 return VK_SAMPLE_COUNT_4_BIT;
82 case Tegra::Texture::MsaaMode::Msaa4x2:
83 case Tegra::Texture::MsaaMode::Msaa4x2_D3D:
84 case Tegra::Texture::MsaaMode::Msaa4x2_VC8:
85 case Tegra::Texture::MsaaMode::Msaa4x2_VC24:
86 return VK_SAMPLE_COUNT_8_BIT;
87 case Tegra::Texture::MsaaMode::Msaa4x4:
88 return VK_SAMPLE_COUNT_16_BIT;
89 default:
90 UNREACHABLE_MSG("Invalid msaa_mode={}", static_cast<int>(msaa_mode));
91 return VK_SAMPLE_COUNT_1_BIT;
92 } 112 }
113 return PixelFormatFromRenderTargetFormat(format);
93} 114}
94 115
95} // Anonymous namespace 116RenderPassKey MakeRenderPassKey(const FixedPipelineState& state) {
117 RenderPassKey key;
118 std::ranges::transform(state.color_formats, key.color_formats.begin(), DecodeFormat);
119 if (state.depth_enabled != 0) {
120 const auto depth_format{static_cast<Tegra::DepthFormat>(state.depth_format.Value())};
121 key.depth_format = PixelFormatFromDepthFormat(depth_format);
122 } else {
123 key.depth_format = PixelFormat::Invalid;
124 }
125 key.samples = MaxwellToVK::MsaaMode(state.msaa_mode);
126 return key;
127}
96 128
97VKGraphicsPipeline::VKGraphicsPipeline(const Device& device_, VKScheduler& scheduler_, 129size_t NumAttachments(const FixedPipelineState& state) {
98 VKDescriptorPool& descriptor_pool_, 130 size_t num{};
99 VKUpdateDescriptorQueue& update_descriptor_queue_, 131 for (size_t index = 0; index < Maxwell::NumRenderTargets; ++index) {
100 const GraphicsPipelineCacheKey& key, 132 const auto format{static_cast<Tegra::RenderTargetFormat>(state.color_formats[index])};
101 vk::Span<VkDescriptorSetLayoutBinding> bindings, 133 if (format != Tegra::RenderTargetFormat::NONE) {
102 const SPIRVProgram& program, u32 num_color_buffers) 134 num = index + 1;
103 : device{device_}, scheduler{scheduler_}, cache_key{key}, hash{cache_key.Hash()}, 135 }
104 descriptor_set_layout{CreateDescriptorSetLayout(bindings)}, 136 }
105 descriptor_allocator{descriptor_pool_, *descriptor_set_layout}, 137 return num;
106 update_descriptor_queue{update_descriptor_queue_}, layout{CreatePipelineLayout()},
107 descriptor_template{CreateDescriptorUpdateTemplate(program)},
108 modules(CreateShaderModules(program)),
109 pipeline(CreatePipeline(program, cache_key.renderpass, num_color_buffers)) {}
110
111VKGraphicsPipeline::~VKGraphicsPipeline() = default;
112
113VkDescriptorSet VKGraphicsPipeline::CommitDescriptorSet() {
114 if (!descriptor_template) {
115 return {};
116 }
117 const VkDescriptorSet set = descriptor_allocator.Commit();
118 update_descriptor_queue.Send(*descriptor_template, set);
119 return set;
120} 138}
121 139
122vk::DescriptorSetLayout VKGraphicsPipeline::CreateDescriptorSetLayout( 140template <typename Spec>
123 vk::Span<VkDescriptorSetLayoutBinding> bindings) const { 141bool Passes(const std::array<vk::ShaderModule, NUM_STAGES>& modules,
124 const VkDescriptorSetLayoutCreateInfo ci{ 142 const std::array<Shader::Info, NUM_STAGES>& stage_infos) {
125 .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, 143 for (size_t stage = 0; stage < NUM_STAGES; ++stage) {
126 .pNext = nullptr, 144 if (!Spec::enabled_stages[stage] && modules[stage]) {
127 .flags = 0, 145 return false;
128 .bindingCount = bindings.size(), 146 }
129 .pBindings = bindings.data(), 147 const auto& info{stage_infos[stage]};
130 }; 148 if constexpr (!Spec::has_storage_buffers) {
131 return device.GetLogical().CreateDescriptorSetLayout(ci); 149 if (!info.storage_buffers_descriptors.empty()) {
150 return false;
151 }
152 }
153 if constexpr (!Spec::has_texture_buffers) {
154 if (!info.texture_buffer_descriptors.empty()) {
155 return false;
156 }
157 }
158 if constexpr (!Spec::has_image_buffers) {
159 if (!info.image_buffer_descriptors.empty()) {
160 return false;
161 }
162 }
163 if constexpr (!Spec::has_images) {
164 if (!info.image_descriptors.empty()) {
165 return false;
166 }
167 }
168 }
169 return true;
132} 170}
133 171
134vk::PipelineLayout VKGraphicsPipeline::CreatePipelineLayout() const { 172using ConfigureFuncPtr = void (*)(GraphicsPipeline*, bool);
135 const VkPipelineLayoutCreateInfo ci{ 173
136 .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, 174template <typename Spec, typename... Specs>
137 .pNext = nullptr, 175ConfigureFuncPtr FindSpec(const std::array<vk::ShaderModule, NUM_STAGES>& modules,
138 .flags = 0, 176 const std::array<Shader::Info, NUM_STAGES>& stage_infos) {
139 .setLayoutCount = 1, 177 if constexpr (sizeof...(Specs) > 0) {
140 .pSetLayouts = descriptor_set_layout.address(), 178 if (!Passes<Spec>(modules, stage_infos)) {
141 .pushConstantRangeCount = 0, 179 return FindSpec<Specs...>(modules, stage_infos);
142 .pPushConstantRanges = nullptr, 180 }
143 }; 181 }
144 return device.GetLogical().CreatePipelineLayout(ci); 182 return GraphicsPipeline::MakeConfigureSpecFunc<Spec>();
145} 183}
146 184
147vk::DescriptorUpdateTemplateKHR VKGraphicsPipeline::CreateDescriptorUpdateTemplate( 185struct SimpleVertexFragmentSpec {
148 const SPIRVProgram& program) const { 186 static constexpr std::array<bool, 5> enabled_stages{true, false, false, false, true};
149 std::vector<VkDescriptorUpdateTemplateEntry> template_entries; 187 static constexpr bool has_storage_buffers = false;
150 u32 binding = 0; 188 static constexpr bool has_texture_buffers = false;
151 u32 offset = 0; 189 static constexpr bool has_image_buffers = false;
152 for (const auto& stage : program) { 190 static constexpr bool has_images = false;
153 if (stage) { 191};
154 FillDescriptorUpdateTemplateEntries(stage->entries, binding, offset, template_entries); 192
193struct SimpleVertexSpec {
194 static constexpr std::array<bool, 5> enabled_stages{true, false, false, false, false};
195 static constexpr bool has_storage_buffers = false;
196 static constexpr bool has_texture_buffers = false;
197 static constexpr bool has_image_buffers = false;
198 static constexpr bool has_images = false;
199};
200
201struct DefaultSpec {
202 static constexpr std::array<bool, 5> enabled_stages{true, true, true, true, true};
203 static constexpr bool has_storage_buffers = true;
204 static constexpr bool has_texture_buffers = true;
205 static constexpr bool has_image_buffers = true;
206 static constexpr bool has_images = true;
207};
208
209ConfigureFuncPtr ConfigureFunc(const std::array<vk::ShaderModule, NUM_STAGES>& modules,
210 const std::array<Shader::Info, NUM_STAGES>& infos) {
211 return FindSpec<SimpleVertexSpec, SimpleVertexFragmentSpec, DefaultSpec>(modules, infos);
212}
213} // Anonymous namespace
214
215GraphicsPipeline::GraphicsPipeline(
216 Tegra::Engines::Maxwell3D& maxwell3d_, Tegra::MemoryManager& gpu_memory_,
217 VKScheduler& scheduler_, BufferCache& buffer_cache_, TextureCache& texture_cache_,
218 VideoCore::ShaderNotify* shader_notify, const Device& device_, DescriptorPool& descriptor_pool,
219 VKUpdateDescriptorQueue& update_descriptor_queue_, Common::ThreadWorker* worker_thread,
220 RenderPassCache& render_pass_cache, const GraphicsPipelineCacheKey& key_,
221 std::array<vk::ShaderModule, NUM_STAGES> stages,
222 const std::array<const Shader::Info*, NUM_STAGES>& infos)
223 : key{key_}, maxwell3d{maxwell3d_}, gpu_memory{gpu_memory_}, device{device_},
224 texture_cache{texture_cache_}, buffer_cache{buffer_cache_}, scheduler{scheduler_},
225 update_descriptor_queue{update_descriptor_queue_}, spv_modules{std::move(stages)} {
226 if (shader_notify) {
227 shader_notify->MarkShaderBuilding();
228 }
229 for (size_t stage = 0; stage < NUM_STAGES; ++stage) {
230 const Shader::Info* const info{infos[stage]};
231 if (!info) {
232 continue;
155 } 233 }
234 stage_infos[stage] = *info;
235 enabled_uniform_buffer_masks[stage] = info->constant_buffer_mask;
236 std::ranges::copy(info->constant_buffer_used_sizes, uniform_buffer_sizes[stage].begin());
156 } 237 }
157 if (template_entries.empty()) { 238 auto func{[this, shader_notify, &render_pass_cache, &descriptor_pool] {
158 // If the shader doesn't use descriptor sets, skip template creation. 239 DescriptorLayoutBuilder builder{MakeBuilder(device, stage_infos)};
159 return {}; 240 uses_push_descriptor = builder.CanUsePushDescriptor();
241 descriptor_set_layout = builder.CreateDescriptorSetLayout(uses_push_descriptor);
242 if (!uses_push_descriptor) {
243 descriptor_allocator = descriptor_pool.Allocator(*descriptor_set_layout, stage_infos);
244 }
245 const VkDescriptorSetLayout set_layout{*descriptor_set_layout};
246 pipeline_layout = builder.CreatePipelineLayout(set_layout);
247 descriptor_update_template =
248 builder.CreateTemplate(set_layout, *pipeline_layout, uses_push_descriptor);
249
250 const VkRenderPass render_pass{render_pass_cache.Get(MakeRenderPassKey(key.state))};
251 Validate();
252 MakePipeline(render_pass);
253
254 std::lock_guard lock{build_mutex};
255 is_built = true;
256 build_condvar.notify_one();
257 if (shader_notify) {
258 shader_notify->MarkShaderComplete();
259 }
260 }};
261 if (worker_thread) {
262 worker_thread->QueueWork(std::move(func));
263 } else {
264 func();
160 } 265 }
266 configure_func = ConfigureFunc(spv_modules, stage_infos);
267}
161 268
162 const VkDescriptorUpdateTemplateCreateInfoKHR ci{ 269void GraphicsPipeline::AddTransition(GraphicsPipeline* transition) {
163 .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_UPDATE_TEMPLATE_CREATE_INFO_KHR, 270 transition_keys.push_back(transition->key);
164 .pNext = nullptr, 271 transitions.push_back(transition);
165 .flags = 0,
166 .descriptorUpdateEntryCount = static_cast<u32>(template_entries.size()),
167 .pDescriptorUpdateEntries = template_entries.data(),
168 .templateType = VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_DESCRIPTOR_SET_KHR,
169 .descriptorSetLayout = *descriptor_set_layout,
170 .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
171 .pipelineLayout = *layout,
172 .set = DESCRIPTOR_SET,
173 };
174 return device.GetLogical().CreateDescriptorUpdateTemplateKHR(ci);
175} 272}
176 273
177std::vector<vk::ShaderModule> VKGraphicsPipeline::CreateShaderModules( 274template <typename Spec>
178 const SPIRVProgram& program) const { 275void GraphicsPipeline::ConfigureImpl(bool is_indexed) {
179 VkShaderModuleCreateInfo ci{ 276 std::array<ImageId, MAX_IMAGE_ELEMENTS> image_view_ids;
180 .sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO, 277 std::array<u32, MAX_IMAGE_ELEMENTS> image_view_indices;
181 .pNext = nullptr, 278 std::array<VkSampler, MAX_IMAGE_ELEMENTS> samplers;
182 .flags = 0, 279 size_t sampler_index{};
183 .codeSize = 0, 280 size_t image_index{};
184 .pCode = nullptr, 281
185 }; 282 texture_cache.SynchronizeGraphicsDescriptors();
283
284 buffer_cache.SetUniformBuffersState(enabled_uniform_buffer_masks, &uniform_buffer_sizes);
285
286 const auto& regs{maxwell3d.regs};
287 const bool via_header_index{regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex};
288 const auto config_stage{[&](size_t stage) LAMBDA_FORCEINLINE {
289 const Shader::Info& info{stage_infos[stage]};
290 buffer_cache.UnbindGraphicsStorageBuffers(stage);
291 if constexpr (Spec::has_storage_buffers) {
292 size_t ssbo_index{};
293 for (const auto& desc : info.storage_buffers_descriptors) {
294 ASSERT(desc.count == 1);
295 buffer_cache.BindGraphicsStorageBuffer(stage, ssbo_index, desc.cbuf_index,
296 desc.cbuf_offset, desc.is_written);
297 ++ssbo_index;
298 }
299 }
300 const auto& cbufs{maxwell3d.state.shader_stages[stage].const_buffers};
301 const auto read_handle{[&](const auto& desc, u32 index) {
302 ASSERT(cbufs[desc.cbuf_index].enabled);
303 const u32 index_offset{index << desc.size_shift};
304 const u32 offset{desc.cbuf_offset + index_offset};
305 const GPUVAddr addr{cbufs[desc.cbuf_index].address + offset};
306 if constexpr (std::is_same_v<decltype(desc), const Shader::TextureDescriptor&> ||
307 std::is_same_v<decltype(desc), const Shader::TextureBufferDescriptor&>) {
308 if (desc.has_secondary) {
309 ASSERT(cbufs[desc.secondary_cbuf_index].enabled);
310 const u32 second_offset{desc.secondary_cbuf_offset + index_offset};
311 const GPUVAddr separate_addr{cbufs[desc.secondary_cbuf_index].address +
312 second_offset};
313 const u32 lhs_raw{gpu_memory.Read<u32>(addr)};
314 const u32 rhs_raw{gpu_memory.Read<u32>(separate_addr)};
315 const u32 raw{lhs_raw | rhs_raw};
316 return TexturePair(raw, via_header_index);
317 }
318 }
319 return TexturePair(gpu_memory.Read<u32>(addr), via_header_index);
320 }};
321 const auto add_image{[&](const auto& desc) {
322 for (u32 index = 0; index < desc.count; ++index) {
323 const auto handle{read_handle(desc, index)};
324 image_view_indices[image_index++] = handle.first;
325 }
326 }};
327 if constexpr (Spec::has_texture_buffers) {
328 for (const auto& desc : info.texture_buffer_descriptors) {
329 add_image(desc);
330 }
331 }
332 if constexpr (Spec::has_image_buffers) {
333 for (const auto& desc : info.image_buffer_descriptors) {
334 add_image(desc);
335 }
336 }
337 for (const auto& desc : info.texture_descriptors) {
338 for (u32 index = 0; index < desc.count; ++index) {
339 const auto handle{read_handle(desc, index)};
340 image_view_indices[image_index++] = handle.first;
186 341
187 std::vector<vk::ShaderModule> shader_modules; 342 Sampler* const sampler{texture_cache.GetGraphicsSampler(handle.second)};
188 shader_modules.reserve(Maxwell::MaxShaderStage); 343 samplers[sampler_index++] = sampler->Handle();
189 for (std::size_t i = 0; i < Maxwell::MaxShaderStage; ++i) { 344 }
190 const auto& stage = program[i]; 345 }
191 if (!stage) { 346 if constexpr (Spec::has_images) {
192 continue; 347 for (const auto& desc : info.image_descriptors) {
348 add_image(desc);
349 }
193 } 350 }
351 }};
352 if constexpr (Spec::enabled_stages[0]) {
353 config_stage(0);
354 }
355 if constexpr (Spec::enabled_stages[1]) {
356 config_stage(1);
357 }
358 if constexpr (Spec::enabled_stages[2]) {
359 config_stage(2);
360 }
361 if constexpr (Spec::enabled_stages[3]) {
362 config_stage(3);
363 }
364 if constexpr (Spec::enabled_stages[4]) {
365 config_stage(4);
366 }
367 const std::span indices_span(image_view_indices.data(), image_index);
368 texture_cache.FillGraphicsImageViews(indices_span, image_view_ids);
369
370 ImageId* texture_buffer_index{image_view_ids.data()};
371 const auto bind_stage_info{[&](size_t stage) LAMBDA_FORCEINLINE {
372 size_t index{};
373 const auto add_buffer{[&](const auto& desc) {
374 constexpr bool is_image = std::is_same_v<decltype(desc), const ImageBufferDescriptor&>;
375 for (u32 i = 0; i < desc.count; ++i) {
376 bool is_written{false};
377 if constexpr (is_image) {
378 is_written = desc.is_written;
379 }
380 ImageView& image_view{texture_cache.GetImageView(*texture_buffer_index)};
381 buffer_cache.BindGraphicsTextureBuffer(stage, index, image_view.GpuAddr(),
382 image_view.BufferSize(), image_view.format,
383 is_written, is_image);
384 ++index;
385 ++texture_buffer_index;
386 }
387 }};
388 buffer_cache.UnbindGraphicsTextureBuffers(stage);
194 389
195 device.SaveShader(stage->code); 390 const Shader::Info& info{stage_infos[stage]};
391 if constexpr (Spec::has_texture_buffers) {
392 for (const auto& desc : info.texture_buffer_descriptors) {
393 add_buffer(desc);
394 }
395 }
396 if constexpr (Spec::has_image_buffers) {
397 for (const auto& desc : info.image_buffer_descriptors) {
398 add_buffer(desc);
399 }
400 }
401 for (const auto& desc : info.texture_descriptors) {
402 texture_buffer_index += desc.count;
403 }
404 if constexpr (Spec::has_images) {
405 for (const auto& desc : info.image_descriptors) {
406 texture_buffer_index += desc.count;
407 }
408 }
409 }};
410 if constexpr (Spec::enabled_stages[0]) {
411 bind_stage_info(0);
412 }
413 if constexpr (Spec::enabled_stages[1]) {
414 bind_stage_info(1);
415 }
416 if constexpr (Spec::enabled_stages[2]) {
417 bind_stage_info(2);
418 }
419 if constexpr (Spec::enabled_stages[3]) {
420 bind_stage_info(3);
421 }
422 if constexpr (Spec::enabled_stages[4]) {
423 bind_stage_info(4);
424 }
425
426 buffer_cache.UpdateGraphicsBuffers(is_indexed);
427 buffer_cache.BindHostGeometryBuffers(is_indexed);
196 428
197 ci.codeSize = stage->code.size() * sizeof(u32); 429 update_descriptor_queue.Acquire();
198 ci.pCode = stage->code.data(); 430
199 shader_modules.push_back(device.GetLogical().CreateShaderModule(ci)); 431 const VkSampler* samplers_it{samplers.data()};
432 const ImageId* views_it{image_view_ids.data()};
433 const auto prepare_stage{[&](size_t stage) LAMBDA_FORCEINLINE {
434 buffer_cache.BindHostStageBuffers(stage);
435 PushImageDescriptors(stage_infos[stage], samplers_it, views_it, texture_cache,
436 update_descriptor_queue);
437 }};
438 if constexpr (Spec::enabled_stages[0]) {
439 prepare_stage(0);
440 }
441 if constexpr (Spec::enabled_stages[1]) {
442 prepare_stage(1);
200 } 443 }
201 return shader_modules; 444 if constexpr (Spec::enabled_stages[2]) {
445 prepare_stage(2);
446 }
447 if constexpr (Spec::enabled_stages[3]) {
448 prepare_stage(3);
449 }
450 if constexpr (Spec::enabled_stages[4]) {
451 prepare_stage(4);
452 }
453 ConfigureDraw();
202} 454}
203 455
204vk::Pipeline VKGraphicsPipeline::CreatePipeline(const SPIRVProgram& program, 456void GraphicsPipeline::ConfigureDraw() {
205 VkRenderPass renderpass, 457 texture_cache.UpdateRenderTargets(false);
206 u32 num_color_buffers) const { 458 scheduler.RequestRenderpass(texture_cache.GetFramebuffer());
207 const auto& state = cache_key.fixed_state; 459
208 const auto& viewport_swizzles = state.viewport_swizzles; 460 if (!is_built.load(std::memory_order::relaxed)) {
209 461 // Wait for the pipeline to be built
210 FixedPipelineState::DynamicState dynamic; 462 scheduler.Record([this](vk::CommandBuffer) {
211 if (device.IsExtExtendedDynamicStateSupported()) { 463 std::unique_lock lock{build_mutex};
212 // Insert dummy values, as long as they are valid they don't matter as extended dynamic 464 build_condvar.wait(lock, [this] { return is_built.load(std::memory_order::relaxed); });
213 // state is ignored
214 dynamic.raw1 = 0;
215 dynamic.raw2 = 0;
216 dynamic.vertex_strides.fill(0);
217 } else {
218 dynamic = state.dynamic_state;
219 }
220
221 std::vector<VkVertexInputBindingDescription> vertex_bindings;
222 std::vector<VkVertexInputBindingDivisorDescriptionEXT> vertex_binding_divisors;
223 for (std::size_t index = 0; index < Maxwell::NumVertexArrays; ++index) {
224 const bool instanced = state.binding_divisors[index] != 0;
225 const auto rate = instanced ? VK_VERTEX_INPUT_RATE_INSTANCE : VK_VERTEX_INPUT_RATE_VERTEX;
226 vertex_bindings.push_back({
227 .binding = static_cast<u32>(index),
228 .stride = dynamic.vertex_strides[index],
229 .inputRate = rate,
230 }); 465 });
231 if (instanced) {
232 vertex_binding_divisors.push_back({
233 .binding = static_cast<u32>(index),
234 .divisor = state.binding_divisors[index],
235 });
236 }
237 } 466 }
467 const bool bind_pipeline{scheduler.UpdateGraphicsPipeline(this)};
468 const void* const descriptor_data{update_descriptor_queue.UpdateData()};
469 scheduler.Record([this, descriptor_data, bind_pipeline](vk::CommandBuffer cmdbuf) {
470 if (bind_pipeline) {
471 cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline);
472 }
473 if (!descriptor_set_layout) {
474 return;
475 }
476 if (uses_push_descriptor) {
477 cmdbuf.PushDescriptorSetWithTemplateKHR(*descriptor_update_template, *pipeline_layout,
478 0, descriptor_data);
479 } else {
480 const VkDescriptorSet descriptor_set{descriptor_allocator.Commit()};
481 const vk::Device& dev{device.GetLogical()};
482 dev.UpdateDescriptorSet(descriptor_set, *descriptor_update_template, descriptor_data);
483 cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline_layout, 0,
484 descriptor_set, nullptr);
485 }
486 });
487}
238 488
239 std::vector<VkVertexInputAttributeDescription> vertex_attributes; 489void GraphicsPipeline::MakePipeline(VkRenderPass render_pass) {
240 const auto& input_attributes = program[0]->entries.attributes; 490 FixedPipelineState::DynamicState dynamic{};
241 for (std::size_t index = 0; index < state.attributes.size(); ++index) { 491 if (!key.state.extended_dynamic_state) {
242 const auto& attribute = state.attributes[index]; 492 dynamic = key.state.dynamic_state;
243 if (!attribute.enabled) { 493 }
244 continue; 494 static_vector<VkVertexInputBindingDescription, 32> vertex_bindings;
495 static_vector<VkVertexInputBindingDivisorDescriptionEXT, 32> vertex_binding_divisors;
496 static_vector<VkVertexInputAttributeDescription, 32> vertex_attributes;
497 if (key.state.dynamic_vertex_input) {
498 for (size_t index = 0; index < key.state.attributes.size(); ++index) {
499 const u32 type = key.state.DynamicAttributeType(index);
500 if (!stage_infos[0].loads.Generic(index) || type == 0) {
501 continue;
502 }
503 vertex_attributes.push_back({
504 .location = static_cast<u32>(index),
505 .binding = 0,
506 .format = type == 1 ? VK_FORMAT_R32_SFLOAT
507 : type == 2 ? VK_FORMAT_R32_SINT : VK_FORMAT_R32_UINT,
508 .offset = 0,
509 });
245 } 510 }
246 if (!input_attributes.contains(static_cast<u32>(index))) { 511 if (!vertex_attributes.empty()) {
247 // Skip attributes not used by the vertex shaders. 512 vertex_bindings.push_back({
248 continue; 513 .binding = 0,
514 .stride = 4,
515 .inputRate = VK_VERTEX_INPUT_RATE_VERTEX,
516 });
517 }
518 } else {
519 for (size_t index = 0; index < Maxwell::NumVertexArrays; ++index) {
520 const bool instanced = key.state.binding_divisors[index] != 0;
521 const auto rate =
522 instanced ? VK_VERTEX_INPUT_RATE_INSTANCE : VK_VERTEX_INPUT_RATE_VERTEX;
523 vertex_bindings.push_back({
524 .binding = static_cast<u32>(index),
525 .stride = dynamic.vertex_strides[index],
526 .inputRate = rate,
527 });
528 if (instanced) {
529 vertex_binding_divisors.push_back({
530 .binding = static_cast<u32>(index),
531 .divisor = key.state.binding_divisors[index],
532 });
533 }
534 }
535 for (size_t index = 0; index < key.state.attributes.size(); ++index) {
536 const auto& attribute = key.state.attributes[index];
537 if (!attribute.enabled || !stage_infos[0].loads.Generic(index)) {
538 continue;
539 }
540 vertex_attributes.push_back({
541 .location = static_cast<u32>(index),
542 .binding = attribute.buffer,
543 .format = MaxwellToVK::VertexFormat(attribute.Type(), attribute.Size()),
544 .offset = attribute.offset,
545 });
249 } 546 }
250 vertex_attributes.push_back({
251 .location = static_cast<u32>(index),
252 .binding = attribute.buffer,
253 .format = MaxwellToVK::VertexFormat(attribute.Type(), attribute.Size()),
254 .offset = attribute.offset,
255 });
256 } 547 }
257
258 VkPipelineVertexInputStateCreateInfo vertex_input_ci{ 548 VkPipelineVertexInputStateCreateInfo vertex_input_ci{
259 .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, 549 .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
260 .pNext = nullptr, 550 .pNext = nullptr,
@@ -264,7 +554,6 @@ vk::Pipeline VKGraphicsPipeline::CreatePipeline(const SPIRVProgram& program,
264 .vertexAttributeDescriptionCount = static_cast<u32>(vertex_attributes.size()), 554 .vertexAttributeDescriptionCount = static_cast<u32>(vertex_attributes.size()),
265 .pVertexAttributeDescriptions = vertex_attributes.data(), 555 .pVertexAttributeDescriptions = vertex_attributes.data(),
266 }; 556 };
267
268 const VkPipelineVertexInputDivisorStateCreateInfoEXT input_divisor_ci{ 557 const VkPipelineVertexInputDivisorStateCreateInfoEXT input_divisor_ci{
269 .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_DIVISOR_STATE_CREATE_INFO_EXT, 558 .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_DIVISOR_STATE_CREATE_INFO_EXT,
270 .pNext = nullptr, 559 .pNext = nullptr,
@@ -274,78 +563,113 @@ vk::Pipeline VKGraphicsPipeline::CreatePipeline(const SPIRVProgram& program,
274 if (!vertex_binding_divisors.empty()) { 563 if (!vertex_binding_divisors.empty()) {
275 vertex_input_ci.pNext = &input_divisor_ci; 564 vertex_input_ci.pNext = &input_divisor_ci;
276 } 565 }
277 566 auto input_assembly_topology = MaxwellToVK::PrimitiveTopology(device, key.state.topology);
278 const auto input_assembly_topology = MaxwellToVK::PrimitiveTopology(device, state.topology); 567 if (input_assembly_topology == VK_PRIMITIVE_TOPOLOGY_PATCH_LIST) {
568 if (!spv_modules[1] && !spv_modules[2]) {
569 LOG_WARNING(Render_Vulkan, "Patch topology used without tessellation, using points");
570 input_assembly_topology = VK_PRIMITIVE_TOPOLOGY_POINT_LIST;
571 }
572 }
279 const VkPipelineInputAssemblyStateCreateInfo input_assembly_ci{ 573 const VkPipelineInputAssemblyStateCreateInfo input_assembly_ci{
280 .sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO, 574 .sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,
281 .pNext = nullptr, 575 .pNext = nullptr,
282 .flags = 0, 576 .flags = 0,
283 .topology = MaxwellToVK::PrimitiveTopology(device, state.topology), 577 .topology = input_assembly_topology,
284 .primitiveRestartEnable = state.primitive_restart_enable != 0 && 578 .primitiveRestartEnable = key.state.primitive_restart_enable != 0 &&
285 SupportsPrimitiveRestart(input_assembly_topology), 579 SupportsPrimitiveRestart(input_assembly_topology),
286 }; 580 };
287
288 const VkPipelineTessellationStateCreateInfo tessellation_ci{ 581 const VkPipelineTessellationStateCreateInfo tessellation_ci{
289 .sType = VK_STRUCTURE_TYPE_PIPELINE_TESSELLATION_STATE_CREATE_INFO, 582 .sType = VK_STRUCTURE_TYPE_PIPELINE_TESSELLATION_STATE_CREATE_INFO,
290 .pNext = nullptr, 583 .pNext = nullptr,
291 .flags = 0, 584 .flags = 0,
292 .patchControlPoints = state.patch_control_points_minus_one.Value() + 1, 585 .patchControlPoints = key.state.patch_control_points_minus_one.Value() + 1,
293 };
294
295 VkPipelineViewportStateCreateInfo viewport_ci{
296 .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,
297 .pNext = nullptr,
298 .flags = 0,
299 .viewportCount = Maxwell::NumViewports,
300 .pViewports = nullptr,
301 .scissorCount = Maxwell::NumViewports,
302 .pScissors = nullptr,
303 }; 586 };
304 587
305 std::array<VkViewportSwizzleNV, Maxwell::NumViewports> swizzles; 588 std::array<VkViewportSwizzleNV, Maxwell::NumViewports> swizzles;
306 std::ranges::transform(viewport_swizzles, swizzles.begin(), UnpackViewportSwizzle); 589 std::ranges::transform(key.state.viewport_swizzles, swizzles.begin(), UnpackViewportSwizzle);
307 VkPipelineViewportSwizzleStateCreateInfoNV swizzle_ci{ 590 const VkPipelineViewportSwizzleStateCreateInfoNV swizzle_ci{
308 .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_SWIZZLE_STATE_CREATE_INFO_NV, 591 .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_SWIZZLE_STATE_CREATE_INFO_NV,
309 .pNext = nullptr, 592 .pNext = nullptr,
310 .flags = 0, 593 .flags = 0,
311 .viewportCount = Maxwell::NumViewports, 594 .viewportCount = Maxwell::NumViewports,
312 .pViewportSwizzles = swizzles.data(), 595 .pViewportSwizzles = swizzles.data(),
313 }; 596 };
314 if (device.IsNvViewportSwizzleSupported()) { 597 const VkPipelineViewportStateCreateInfo viewport_ci{
315 viewport_ci.pNext = &swizzle_ci; 598 .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,
316 } 599 .pNext = device.IsNvViewportSwizzleSupported() ? &swizzle_ci : nullptr,
600 .flags = 0,
601 .viewportCount = Maxwell::NumViewports,
602 .pViewports = nullptr,
603 .scissorCount = Maxwell::NumViewports,
604 .pScissors = nullptr,
605 };
317 606
318 const VkPipelineRasterizationStateCreateInfo rasterization_ci{ 607 VkPipelineRasterizationStateCreateInfo rasterization_ci{
319 .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO, 608 .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
320 .pNext = nullptr, 609 .pNext = nullptr,
321 .flags = 0, 610 .flags = 0,
322 .depthClampEnable = 611 .depthClampEnable =
323 static_cast<VkBool32>(state.depth_clamp_disabled == 0 ? VK_TRUE : VK_FALSE), 612 static_cast<VkBool32>(key.state.depth_clamp_disabled == 0 ? VK_TRUE : VK_FALSE),
324 .rasterizerDiscardEnable = 613 .rasterizerDiscardEnable =
325 static_cast<VkBool32>(state.rasterize_enable == 0 ? VK_TRUE : VK_FALSE), 614 static_cast<VkBool32>(key.state.rasterize_enable == 0 ? VK_TRUE : VK_FALSE),
326 .polygonMode = VK_POLYGON_MODE_FILL, 615 .polygonMode =
616 MaxwellToVK::PolygonMode(FixedPipelineState::UnpackPolygonMode(key.state.polygon_mode)),
327 .cullMode = static_cast<VkCullModeFlags>( 617 .cullMode = static_cast<VkCullModeFlags>(
328 dynamic.cull_enable ? MaxwellToVK::CullFace(dynamic.CullFace()) : VK_CULL_MODE_NONE), 618 dynamic.cull_enable ? MaxwellToVK::CullFace(dynamic.CullFace()) : VK_CULL_MODE_NONE),
329 .frontFace = MaxwellToVK::FrontFace(dynamic.FrontFace()), 619 .frontFace = MaxwellToVK::FrontFace(dynamic.FrontFace()),
330 .depthBiasEnable = state.depth_bias_enable, 620 .depthBiasEnable = key.state.depth_bias_enable,
331 .depthBiasConstantFactor = 0.0f, 621 .depthBiasConstantFactor = 0.0f,
332 .depthBiasClamp = 0.0f, 622 .depthBiasClamp = 0.0f,
333 .depthBiasSlopeFactor = 0.0f, 623 .depthBiasSlopeFactor = 0.0f,
334 .lineWidth = 1.0f, 624 .lineWidth = 1.0f,
335 }; 625 };
626 VkPipelineRasterizationLineStateCreateInfoEXT line_state{
627 .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_LINE_STATE_CREATE_INFO_EXT,
628 .pNext = nullptr,
629 .lineRasterizationMode = key.state.smooth_lines != 0
630 ? VK_LINE_RASTERIZATION_MODE_RECTANGULAR_SMOOTH_EXT
631 : VK_LINE_RASTERIZATION_MODE_RECTANGULAR_EXT,
632 .stippledLineEnable = VK_FALSE, // TODO
633 .lineStippleFactor = 0,
634 .lineStipplePattern = 0,
635 };
636 VkPipelineRasterizationConservativeStateCreateInfoEXT conservative_raster{
637 .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_CONSERVATIVE_STATE_CREATE_INFO_EXT,
638 .pNext = nullptr,
639 .flags = 0,
640 .conservativeRasterizationMode = key.state.conservative_raster_enable != 0
641 ? VK_CONSERVATIVE_RASTERIZATION_MODE_OVERESTIMATE_EXT
642 : VK_CONSERVATIVE_RASTERIZATION_MODE_DISABLED_EXT,
643 .extraPrimitiveOverestimationSize = 0.0f,
644 };
645 VkPipelineRasterizationProvokingVertexStateCreateInfoEXT provoking_vertex{
646 .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_PROVOKING_VERTEX_STATE_CREATE_INFO_EXT,
647 .pNext = nullptr,
648 .provokingVertexMode = key.state.provoking_vertex_last != 0
649 ? VK_PROVOKING_VERTEX_MODE_LAST_VERTEX_EXT
650 : VK_PROVOKING_VERTEX_MODE_FIRST_VERTEX_EXT,
651 };
652 if (IsLine(input_assembly_topology) && device.IsExtLineRasterizationSupported()) {
653 line_state.pNext = std::exchange(rasterization_ci.pNext, &line_state);
654 }
655 if (device.IsExtConservativeRasterizationSupported()) {
656 conservative_raster.pNext = std::exchange(rasterization_ci.pNext, &conservative_raster);
657 }
658 if (device.IsExtProvokingVertexSupported()) {
659 provoking_vertex.pNext = std::exchange(rasterization_ci.pNext, &provoking_vertex);
660 }
336 661
337 const VkPipelineMultisampleStateCreateInfo multisample_ci{ 662 const VkPipelineMultisampleStateCreateInfo multisample_ci{
338 .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO, 663 .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
339 .pNext = nullptr, 664 .pNext = nullptr,
340 .flags = 0, 665 .flags = 0,
341 .rasterizationSamples = ConvertMsaaMode(state.msaa_mode), 666 .rasterizationSamples = MaxwellToVK::MsaaMode(key.state.msaa_mode),
342 .sampleShadingEnable = VK_FALSE, 667 .sampleShadingEnable = VK_FALSE,
343 .minSampleShading = 0.0f, 668 .minSampleShading = 0.0f,
344 .pSampleMask = nullptr, 669 .pSampleMask = nullptr,
345 .alphaToCoverageEnable = VK_FALSE, 670 .alphaToCoverageEnable = VK_FALSE,
346 .alphaToOneEnable = VK_FALSE, 671 .alphaToOneEnable = VK_FALSE,
347 }; 672 };
348
349 const VkPipelineDepthStencilStateCreateInfo depth_stencil_ci{ 673 const VkPipelineDepthStencilStateCreateInfo depth_stencil_ci{
350 .sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO, 674 .sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO,
351 .pNext = nullptr, 675 .pNext = nullptr,
@@ -355,32 +679,32 @@ vk::Pipeline VKGraphicsPipeline::CreatePipeline(const SPIRVProgram& program,
355 .depthCompareOp = dynamic.depth_test_enable 679 .depthCompareOp = dynamic.depth_test_enable
356 ? MaxwellToVK::ComparisonOp(dynamic.DepthTestFunc()) 680 ? MaxwellToVK::ComparisonOp(dynamic.DepthTestFunc())
357 : VK_COMPARE_OP_ALWAYS, 681 : VK_COMPARE_OP_ALWAYS,
358 .depthBoundsTestEnable = dynamic.depth_bounds_enable, 682 .depthBoundsTestEnable = dynamic.depth_bounds_enable && device.IsDepthBoundsSupported(),
359 .stencilTestEnable = dynamic.stencil_enable, 683 .stencilTestEnable = dynamic.stencil_enable,
360 .front = GetStencilFaceState(dynamic.front), 684 .front = GetStencilFaceState(dynamic.front),
361 .back = GetStencilFaceState(dynamic.back), 685 .back = GetStencilFaceState(dynamic.back),
362 .minDepthBounds = 0.0f, 686 .minDepthBounds = 0.0f,
363 .maxDepthBounds = 0.0f, 687 .maxDepthBounds = 0.0f,
364 }; 688 };
365 689 if (dynamic.depth_bounds_enable && !device.IsDepthBoundsSupported()) {
366 std::array<VkPipelineColorBlendAttachmentState, Maxwell::NumRenderTargets> cb_attachments; 690 LOG_WARNING(Render_Vulkan, "Depth bounds is enabled but not supported");
367 for (std::size_t index = 0; index < num_color_buffers; ++index) { 691 }
368 static constexpr std::array COMPONENT_TABLE{ 692 static_vector<VkPipelineColorBlendAttachmentState, Maxwell::NumRenderTargets> cb_attachments;
693 const size_t num_attachments{NumAttachments(key.state)};
694 for (size_t index = 0; index < num_attachments; ++index) {
695 static constexpr std::array mask_table{
369 VK_COLOR_COMPONENT_R_BIT, 696 VK_COLOR_COMPONENT_R_BIT,
370 VK_COLOR_COMPONENT_G_BIT, 697 VK_COLOR_COMPONENT_G_BIT,
371 VK_COLOR_COMPONENT_B_BIT, 698 VK_COLOR_COMPONENT_B_BIT,
372 VK_COLOR_COMPONENT_A_BIT, 699 VK_COLOR_COMPONENT_A_BIT,
373 }; 700 };
374 const auto& blend = state.attachments[index]; 701 const auto& blend{key.state.attachments[index]};
375 702 const std::array mask{blend.Mask()};
376 VkColorComponentFlags color_components = 0; 703 VkColorComponentFlags write_mask{};
377 for (std::size_t i = 0; i < COMPONENT_TABLE.size(); ++i) { 704 for (size_t i = 0; i < mask_table.size(); ++i) {
378 if (blend.Mask()[i]) { 705 write_mask |= mask[i] ? mask_table[i] : 0;
379 color_components |= COMPONENT_TABLE[i];
380 }
381 } 706 }
382 707 cb_attachments.push_back({
383 cb_attachments[index] = {
384 .blendEnable = blend.enable != 0, 708 .blendEnable = blend.enable != 0,
385 .srcColorBlendFactor = MaxwellToVK::BlendFactor(blend.SourceRGBFactor()), 709 .srcColorBlendFactor = MaxwellToVK::BlendFactor(blend.SourceRGBFactor()),
386 .dstColorBlendFactor = MaxwellToVK::BlendFactor(blend.DestRGBFactor()), 710 .dstColorBlendFactor = MaxwellToVK::BlendFactor(blend.DestRGBFactor()),
@@ -388,28 +712,27 @@ vk::Pipeline VKGraphicsPipeline::CreatePipeline(const SPIRVProgram& program,
388 .srcAlphaBlendFactor = MaxwellToVK::BlendFactor(blend.SourceAlphaFactor()), 712 .srcAlphaBlendFactor = MaxwellToVK::BlendFactor(blend.SourceAlphaFactor()),
389 .dstAlphaBlendFactor = MaxwellToVK::BlendFactor(blend.DestAlphaFactor()), 713 .dstAlphaBlendFactor = MaxwellToVK::BlendFactor(blend.DestAlphaFactor()),
390 .alphaBlendOp = MaxwellToVK::BlendEquation(blend.EquationAlpha()), 714 .alphaBlendOp = MaxwellToVK::BlendEquation(blend.EquationAlpha()),
391 .colorWriteMask = color_components, 715 .colorWriteMask = write_mask,
392 }; 716 });
393 } 717 }
394
395 const VkPipelineColorBlendStateCreateInfo color_blend_ci{ 718 const VkPipelineColorBlendStateCreateInfo color_blend_ci{
396 .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO, 719 .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,
397 .pNext = nullptr, 720 .pNext = nullptr,
398 .flags = 0, 721 .flags = 0,
399 .logicOpEnable = VK_FALSE, 722 .logicOpEnable = VK_FALSE,
400 .logicOp = VK_LOGIC_OP_COPY, 723 .logicOp = VK_LOGIC_OP_COPY,
401 .attachmentCount = num_color_buffers, 724 .attachmentCount = static_cast<u32>(cb_attachments.size()),
402 .pAttachments = cb_attachments.data(), 725 .pAttachments = cb_attachments.data(),
403 .blendConstants = {}, 726 .blendConstants = {},
404 }; 727 };
405 728 static_vector<VkDynamicState, 19> dynamic_states{
406 std::vector dynamic_states{
407 VK_DYNAMIC_STATE_VIEWPORT, VK_DYNAMIC_STATE_SCISSOR, 729 VK_DYNAMIC_STATE_VIEWPORT, VK_DYNAMIC_STATE_SCISSOR,
408 VK_DYNAMIC_STATE_DEPTH_BIAS, VK_DYNAMIC_STATE_BLEND_CONSTANTS, 730 VK_DYNAMIC_STATE_DEPTH_BIAS, VK_DYNAMIC_STATE_BLEND_CONSTANTS,
409 VK_DYNAMIC_STATE_DEPTH_BOUNDS, VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK, 731 VK_DYNAMIC_STATE_DEPTH_BOUNDS, VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK,
410 VK_DYNAMIC_STATE_STENCIL_WRITE_MASK, VK_DYNAMIC_STATE_STENCIL_REFERENCE, 732 VK_DYNAMIC_STATE_STENCIL_WRITE_MASK, VK_DYNAMIC_STATE_STENCIL_REFERENCE,
733 VK_DYNAMIC_STATE_LINE_WIDTH,
411 }; 734 };
412 if (device.IsExtExtendedDynamicStateSupported()) { 735 if (key.state.extended_dynamic_state) {
413 static constexpr std::array extended{ 736 static constexpr std::array extended{
414 VK_DYNAMIC_STATE_CULL_MODE_EXT, 737 VK_DYNAMIC_STATE_CULL_MODE_EXT,
415 VK_DYNAMIC_STATE_FRONT_FACE_EXT, 738 VK_DYNAMIC_STATE_FRONT_FACE_EXT,
@@ -421,9 +744,11 @@ vk::Pipeline VKGraphicsPipeline::CreatePipeline(const SPIRVProgram& program,
421 VK_DYNAMIC_STATE_STENCIL_TEST_ENABLE_EXT, 744 VK_DYNAMIC_STATE_STENCIL_TEST_ENABLE_EXT,
422 VK_DYNAMIC_STATE_STENCIL_OP_EXT, 745 VK_DYNAMIC_STATE_STENCIL_OP_EXT,
423 }; 746 };
747 if (key.state.dynamic_vertex_input) {
748 dynamic_states.push_back(VK_DYNAMIC_STATE_VERTEX_INPUT_EXT);
749 }
424 dynamic_states.insert(dynamic_states.end(), extended.begin(), extended.end()); 750 dynamic_states.insert(dynamic_states.end(), extended.begin(), extended.end());
425 } 751 }
426
427 const VkPipelineDynamicStateCreateInfo dynamic_state_ci{ 752 const VkPipelineDynamicStateCreateInfo dynamic_state_ci{
428 .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO, 753 .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,
429 .pNext = nullptr, 754 .pNext = nullptr,
@@ -431,34 +756,33 @@ vk::Pipeline VKGraphicsPipeline::CreatePipeline(const SPIRVProgram& program,
431 .dynamicStateCount = static_cast<u32>(dynamic_states.size()), 756 .dynamicStateCount = static_cast<u32>(dynamic_states.size()),
432 .pDynamicStates = dynamic_states.data(), 757 .pDynamicStates = dynamic_states.data(),
433 }; 758 };
434 759 [[maybe_unused]] const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT subgroup_size_ci{
435 const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT subgroup_size_ci{
436 .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT, 760 .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT,
437 .pNext = nullptr, 761 .pNext = nullptr,
438 .requiredSubgroupSize = GuestWarpSize, 762 .requiredSubgroupSize = GuestWarpSize,
439 }; 763 };
440 764 static_vector<VkPipelineShaderStageCreateInfo, 5> shader_stages;
441 std::vector<VkPipelineShaderStageCreateInfo> shader_stages; 765 for (size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) {
442 std::size_t module_index = 0; 766 if (!spv_modules[stage]) {
443 for (std::size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) {
444 if (!program[stage]) {
445 continue; 767 continue;
446 } 768 }
447 769 [[maybe_unused]] auto& stage_ci =
448 VkPipelineShaderStageCreateInfo& stage_ci = shader_stages.emplace_back(); 770 shader_stages.emplace_back(VkPipelineShaderStageCreateInfo{
449 stage_ci.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO; 771 .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
450 stage_ci.pNext = nullptr; 772 .pNext = nullptr,
451 stage_ci.flags = 0; 773 .flags = 0,
452 stage_ci.stage = MaxwellToVK::ShaderStage(static_cast<Tegra::Engines::ShaderType>(stage)); 774 .stage = MaxwellToVK::ShaderStage(Shader::StageFromIndex(stage)),
453 stage_ci.module = *modules[module_index++]; 775 .module = *spv_modules[stage],
454 stage_ci.pName = "main"; 776 .pName = "main",
455 stage_ci.pSpecializationInfo = nullptr; 777 .pSpecializationInfo = nullptr,
456 778 });
779 /*
457 if (program[stage]->entries.uses_warps && device.IsGuestWarpSizeSupported(stage_ci.stage)) { 780 if (program[stage]->entries.uses_warps && device.IsGuestWarpSizeSupported(stage_ci.stage)) {
458 stage_ci.pNext = &subgroup_size_ci; 781 stage_ci.pNext = &subgroup_size_ci;
459 } 782 }
783 */
460 } 784 }
461 return device.GetLogical().CreateGraphicsPipeline(VkGraphicsPipelineCreateInfo{ 785 pipeline = device.GetLogical().CreateGraphicsPipeline({
462 .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, 786 .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
463 .pNext = nullptr, 787 .pNext = nullptr,
464 .flags = 0, 788 .flags = 0,
@@ -473,12 +797,31 @@ vk::Pipeline VKGraphicsPipeline::CreatePipeline(const SPIRVProgram& program,
473 .pDepthStencilState = &depth_stencil_ci, 797 .pDepthStencilState = &depth_stencil_ci,
474 .pColorBlendState = &color_blend_ci, 798 .pColorBlendState = &color_blend_ci,
475 .pDynamicState = &dynamic_state_ci, 799 .pDynamicState = &dynamic_state_ci,
476 .layout = *layout, 800 .layout = *pipeline_layout,
477 .renderPass = renderpass, 801 .renderPass = render_pass,
478 .subpass = 0, 802 .subpass = 0,
479 .basePipelineHandle = nullptr, 803 .basePipelineHandle = nullptr,
480 .basePipelineIndex = 0, 804 .basePipelineIndex = 0,
481 }); 805 });
482} 806}
483 807
808void GraphicsPipeline::Validate() {
809 size_t num_images{};
810 for (const auto& info : stage_infos) {
811 for (const auto& desc : info.texture_buffer_descriptors) {
812 num_images += desc.count;
813 }
814 for (const auto& desc : info.image_buffer_descriptors) {
815 num_images += desc.count;
816 }
817 for (const auto& desc : info.texture_descriptors) {
818 num_images += desc.count;
819 }
820 for (const auto& desc : info.image_descriptors) {
821 num_images += desc.count;
822 }
823 }
824 ASSERT(num_images <= MAX_IMAGE_ELEMENTS);
825}
826
484} // namespace Vulkan 827} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h
index 8b6a98fe0..2bd48d697 100644
--- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h
+++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h
@@ -1,30 +1,36 @@
1// Copyright 2019 yuzu Emulator Project 1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#pragma once 5#pragma once
6 6
7#include <algorithm>
7#include <array> 8#include <array>
8#include <optional> 9#include <atomic>
9#include <vector> 10#include <condition_variable>
11#include <mutex>
12#include <type_traits>
10 13
11#include "common/common_types.h" 14#include "common/thread_worker.h"
15#include "shader_recompiler/shader_info.h"
12#include "video_core/engines/maxwell_3d.h" 16#include "video_core/engines/maxwell_3d.h"
13#include "video_core/renderer_vulkan/fixed_pipeline_state.h" 17#include "video_core/renderer_vulkan/fixed_pipeline_state.h"
18#include "video_core/renderer_vulkan/vk_buffer_cache.h"
14#include "video_core/renderer_vulkan/vk_descriptor_pool.h" 19#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
15#include "video_core/renderer_vulkan/vk_shader_decompiler.h" 20#include "video_core/renderer_vulkan/vk_texture_cache.h"
16#include "video_core/vulkan_common/vulkan_wrapper.h" 21#include "video_core/vulkan_common/vulkan_wrapper.h"
17 22
18namespace Vulkan { 23namespace VideoCore {
24class ShaderNotify;
25}
19 26
20using Maxwell = Tegra::Engines::Maxwell3D::Regs; 27namespace Vulkan {
21 28
22struct GraphicsPipelineCacheKey { 29struct GraphicsPipelineCacheKey {
23 VkRenderPass renderpass; 30 std::array<u64, 6> unique_hashes;
24 std::array<GPUVAddr, Maxwell::MaxShaderProgram> shaders; 31 FixedPipelineState state;
25 FixedPipelineState fixed_state;
26 32
27 std::size_t Hash() const noexcept; 33 size_t Hash() const noexcept;
28 34
29 bool operator==(const GraphicsPipelineCacheKey& rhs) const noexcept; 35 bool operator==(const GraphicsPipelineCacheKey& rhs) const noexcept;
30 36
@@ -32,72 +38,115 @@ struct GraphicsPipelineCacheKey {
32 return !operator==(rhs); 38 return !operator==(rhs);
33 } 39 }
34 40
35 std::size_t Size() const noexcept { 41 size_t Size() const noexcept {
36 return sizeof(renderpass) + sizeof(shaders) + fixed_state.Size(); 42 return sizeof(unique_hashes) + state.Size();
37 } 43 }
38}; 44};
39static_assert(std::has_unique_object_representations_v<GraphicsPipelineCacheKey>); 45static_assert(std::has_unique_object_representations_v<GraphicsPipelineCacheKey>);
40static_assert(std::is_trivially_copyable_v<GraphicsPipelineCacheKey>); 46static_assert(std::is_trivially_copyable_v<GraphicsPipelineCacheKey>);
41static_assert(std::is_trivially_constructible_v<GraphicsPipelineCacheKey>); 47static_assert(std::is_trivially_constructible_v<GraphicsPipelineCacheKey>);
42 48
49} // namespace Vulkan
50
51namespace std {
52template <>
53struct hash<Vulkan::GraphicsPipelineCacheKey> {
54 size_t operator()(const Vulkan::GraphicsPipelineCacheKey& k) const noexcept {
55 return k.Hash();
56 }
57};
58} // namespace std
59
60namespace Vulkan {
61
43class Device; 62class Device;
44class VKDescriptorPool; 63class RenderPassCache;
45class VKScheduler; 64class VKScheduler;
46class VKUpdateDescriptorQueue; 65class VKUpdateDescriptorQueue;
47 66
48using SPIRVProgram = std::array<std::optional<SPIRVShader>, Maxwell::MaxShaderStage>; 67class GraphicsPipeline {
68 static constexpr size_t NUM_STAGES = Tegra::Engines::Maxwell3D::Regs::MaxShaderStage;
49 69
50class VKGraphicsPipeline final {
51public: 70public:
52 explicit VKGraphicsPipeline(const Device& device_, VKScheduler& scheduler_, 71 explicit GraphicsPipeline(
53 VKDescriptorPool& descriptor_pool, 72 Tegra::Engines::Maxwell3D& maxwell3d, Tegra::MemoryManager& gpu_memory,
54 VKUpdateDescriptorQueue& update_descriptor_queue_, 73 VKScheduler& scheduler, BufferCache& buffer_cache, TextureCache& texture_cache,
55 const GraphicsPipelineCacheKey& key, 74 VideoCore::ShaderNotify* shader_notify, const Device& device,
56 vk::Span<VkDescriptorSetLayoutBinding> bindings, 75 DescriptorPool& descriptor_pool, VKUpdateDescriptorQueue& update_descriptor_queue,
57 const SPIRVProgram& program, u32 num_color_buffers); 76 Common::ThreadWorker* worker_thread, RenderPassCache& render_pass_cache,
58 ~VKGraphicsPipeline(); 77 const GraphicsPipelineCacheKey& key, std::array<vk::ShaderModule, NUM_STAGES> stages,
59 78 const std::array<const Shader::Info*, NUM_STAGES>& infos);
60 VkDescriptorSet CommitDescriptorSet(); 79
61 80 GraphicsPipeline& operator=(GraphicsPipeline&&) noexcept = delete;
62 VkPipeline GetHandle() const { 81 GraphicsPipeline(GraphicsPipeline&&) noexcept = delete;
63 return *pipeline; 82
83 GraphicsPipeline& operator=(const GraphicsPipeline&) = delete;
84 GraphicsPipeline(const GraphicsPipeline&) = delete;
85
86 void AddTransition(GraphicsPipeline* transition);
87
88 void Configure(bool is_indexed) {
89 configure_func(this, is_indexed);
64 } 90 }
65 91
66 VkPipelineLayout GetLayout() const { 92 [[nodiscard]] GraphicsPipeline* Next(const GraphicsPipelineCacheKey& current_key) noexcept {
67 return *layout; 93 if (key == current_key) {
94 return this;
95 }
96 const auto it{std::find(transition_keys.begin(), transition_keys.end(), current_key)};
97 return it != transition_keys.end() ? transitions[std::distance(transition_keys.begin(), it)]
98 : nullptr;
68 } 99 }
69 100
70 GraphicsPipelineCacheKey GetCacheKey() const { 101 [[nodiscard]] bool IsBuilt() const noexcept {
71 return cache_key; 102 return is_built.load(std::memory_order::relaxed);
72 } 103 }
73 104
74private: 105 template <typename Spec>
75 vk::DescriptorSetLayout CreateDescriptorSetLayout( 106 static auto MakeConfigureSpecFunc() {
76 vk::Span<VkDescriptorSetLayoutBinding> bindings) const; 107 return [](GraphicsPipeline* pl, bool is_indexed) { pl->ConfigureImpl<Spec>(is_indexed); };
108 }
77 109
78 vk::PipelineLayout CreatePipelineLayout() const; 110private:
111 template <typename Spec>
112 void ConfigureImpl(bool is_indexed);
79 113
80 vk::DescriptorUpdateTemplateKHR CreateDescriptorUpdateTemplate( 114 void ConfigureDraw();
81 const SPIRVProgram& program) const;
82 115
83 std::vector<vk::ShaderModule> CreateShaderModules(const SPIRVProgram& program) const; 116 void MakePipeline(VkRenderPass render_pass);
84 117
85 vk::Pipeline CreatePipeline(const SPIRVProgram& program, VkRenderPass renderpass, 118 void Validate();
86 u32 num_color_buffers) const;
87 119
120 const GraphicsPipelineCacheKey key;
121 Tegra::Engines::Maxwell3D& maxwell3d;
122 Tegra::MemoryManager& gpu_memory;
88 const Device& device; 123 const Device& device;
124 TextureCache& texture_cache;
125 BufferCache& buffer_cache;
89 VKScheduler& scheduler; 126 VKScheduler& scheduler;
90 const GraphicsPipelineCacheKey cache_key; 127 VKUpdateDescriptorQueue& update_descriptor_queue;
91 const u64 hash; 128
129 void (*configure_func)(GraphicsPipeline*, bool){};
130
131 std::vector<GraphicsPipelineCacheKey> transition_keys;
132 std::vector<GraphicsPipeline*> transitions;
133
134 std::array<vk::ShaderModule, NUM_STAGES> spv_modules;
135
136 std::array<Shader::Info, NUM_STAGES> stage_infos;
137 std::array<u32, 5> enabled_uniform_buffer_masks{};
138 VideoCommon::UniformBufferSizes uniform_buffer_sizes{};
92 139
93 vk::DescriptorSetLayout descriptor_set_layout; 140 vk::DescriptorSetLayout descriptor_set_layout;
94 DescriptorAllocator descriptor_allocator; 141 DescriptorAllocator descriptor_allocator;
95 VKUpdateDescriptorQueue& update_descriptor_queue; 142 vk::PipelineLayout pipeline_layout;
96 vk::PipelineLayout layout; 143 vk::DescriptorUpdateTemplateKHR descriptor_update_template;
97 vk::DescriptorUpdateTemplateKHR descriptor_template;
98 std::vector<vk::ShaderModule> modules;
99
100 vk::Pipeline pipeline; 144 vk::Pipeline pipeline;
145
146 std::condition_variable build_condvar;
147 std::mutex build_mutex;
148 std::atomic_bool is_built{false};
149 bool uses_push_descriptor{false};
101}; 150};
102 151
103} // namespace Vulkan 152} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_master_semaphore.h b/src/video_core/renderer_vulkan/vk_master_semaphore.h
index ee3cd35d0..4f8688118 100644
--- a/src/video_core/renderer_vulkan/vk_master_semaphore.h
+++ b/src/video_core/renderer_vulkan/vk_master_semaphore.h
@@ -39,9 +39,9 @@ public:
39 return KnownGpuTick() >= tick; 39 return KnownGpuTick() >= tick;
40 } 40 }
41 41
42 /// Advance to the logical tick. 42 /// Advance to the logical tick and return the old one
43 void NextTick() noexcept { 43 [[nodiscard]] u64 NextTick() noexcept {
44 ++current_tick; 44 return current_tick.fetch_add(1, std::memory_order::relaxed);
45 } 45 }
46 46
47 /// Refresh the known GPU tick 47 /// Refresh the known GPU tick
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
index 8991505ca..57b163247 100644
--- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
@@ -4,444 +4,613 @@
4 4
5#include <algorithm> 5#include <algorithm>
6#include <cstddef> 6#include <cstddef>
7#include <fstream>
7#include <memory> 8#include <memory>
9#include <thread>
8#include <vector> 10#include <vector>
9 11
10#include "common/bit_cast.h" 12#include "common/bit_cast.h"
11#include "common/cityhash.h" 13#include "common/cityhash.h"
14#include "common/fs/fs.h"
15#include "common/fs/path_util.h"
12#include "common/microprofile.h" 16#include "common/microprofile.h"
17#include "common/thread_worker.h"
13#include "core/core.h" 18#include "core/core.h"
14#include "core/memory.h" 19#include "core/memory.h"
20#include "shader_recompiler/backend/spirv/emit_spirv.h"
21#include "shader_recompiler/environment.h"
22#include "shader_recompiler/frontend/maxwell/control_flow.h"
23#include "shader_recompiler/frontend/maxwell/translate_program.h"
24#include "shader_recompiler/program_header.h"
25#include "video_core/dirty_flags.h"
15#include "video_core/engines/kepler_compute.h" 26#include "video_core/engines/kepler_compute.h"
16#include "video_core/engines/maxwell_3d.h" 27#include "video_core/engines/maxwell_3d.h"
17#include "video_core/memory_manager.h" 28#include "video_core/memory_manager.h"
18#include "video_core/renderer_vulkan/fixed_pipeline_state.h" 29#include "video_core/renderer_vulkan/fixed_pipeline_state.h"
19#include "video_core/renderer_vulkan/maxwell_to_vk.h" 30#include "video_core/renderer_vulkan/maxwell_to_vk.h"
31#include "video_core/renderer_vulkan/pipeline_helper.h"
20#include "video_core/renderer_vulkan/vk_compute_pipeline.h" 32#include "video_core/renderer_vulkan/vk_compute_pipeline.h"
21#include "video_core/renderer_vulkan/vk_descriptor_pool.h" 33#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
22#include "video_core/renderer_vulkan/vk_graphics_pipeline.h"
23#include "video_core/renderer_vulkan/vk_pipeline_cache.h" 34#include "video_core/renderer_vulkan/vk_pipeline_cache.h"
24#include "video_core/renderer_vulkan/vk_rasterizer.h" 35#include "video_core/renderer_vulkan/vk_rasterizer.h"
25#include "video_core/renderer_vulkan/vk_scheduler.h" 36#include "video_core/renderer_vulkan/vk_scheduler.h"
37#include "video_core/renderer_vulkan/vk_shader_util.h"
26#include "video_core/renderer_vulkan/vk_update_descriptor.h" 38#include "video_core/renderer_vulkan/vk_update_descriptor.h"
27#include "video_core/shader/compiler_settings.h"
28#include "video_core/shader/memory_util.h"
29#include "video_core/shader_cache.h" 39#include "video_core/shader_cache.h"
40#include "video_core/shader_environment.h"
30#include "video_core/shader_notify.h" 41#include "video_core/shader_notify.h"
31#include "video_core/vulkan_common/vulkan_device.h" 42#include "video_core/vulkan_common/vulkan_device.h"
32#include "video_core/vulkan_common/vulkan_wrapper.h" 43#include "video_core/vulkan_common/vulkan_wrapper.h"
33 44
34namespace Vulkan { 45namespace Vulkan {
35
36MICROPROFILE_DECLARE(Vulkan_PipelineCache); 46MICROPROFILE_DECLARE(Vulkan_PipelineCache);
37 47
38using Tegra::Engines::ShaderType;
39using VideoCommon::Shader::GetShaderAddress;
40using VideoCommon::Shader::GetShaderCode;
41using VideoCommon::Shader::KERNEL_MAIN_OFFSET;
42using VideoCommon::Shader::ProgramCode;
43using VideoCommon::Shader::STAGE_MAIN_OFFSET;
44
45namespace { 48namespace {
49using Shader::Backend::SPIRV::EmitSPIRV;
50using Shader::Maxwell::MergeDualVertexPrograms;
51using Shader::Maxwell::TranslateProgram;
52using VideoCommon::ComputeEnvironment;
53using VideoCommon::FileEnvironment;
54using VideoCommon::GenericEnvironment;
55using VideoCommon::GraphicsEnvironment;
56
57constexpr u32 CACHE_VERSION = 5;
58
59template <typename Container>
60auto MakeSpan(Container& container) {
61 return std::span(container.data(), container.size());
62}
46 63
47constexpr VkDescriptorType UNIFORM_BUFFER = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; 64Shader::CompareFunction MaxwellToCompareFunction(Maxwell::ComparisonOp comparison) {
48constexpr VkDescriptorType STORAGE_BUFFER = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; 65 switch (comparison) {
49constexpr VkDescriptorType UNIFORM_TEXEL_BUFFER = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER; 66 case Maxwell::ComparisonOp::Never:
50constexpr VkDescriptorType COMBINED_IMAGE_SAMPLER = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; 67 case Maxwell::ComparisonOp::NeverOld:
51constexpr VkDescriptorType STORAGE_TEXEL_BUFFER = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER; 68 return Shader::CompareFunction::Never;
52constexpr VkDescriptorType STORAGE_IMAGE = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE; 69 case Maxwell::ComparisonOp::Less:
53 70 case Maxwell::ComparisonOp::LessOld:
54constexpr VideoCommon::Shader::CompilerSettings compiler_settings{ 71 return Shader::CompareFunction::Less;
55 .depth = VideoCommon::Shader::CompileDepth::FullDecompile, 72 case Maxwell::ComparisonOp::Equal:
56 .disable_else_derivation = true, 73 case Maxwell::ComparisonOp::EqualOld:
57}; 74 return Shader::CompareFunction::Equal;
58 75 case Maxwell::ComparisonOp::LessEqual:
59constexpr std::size_t GetStageFromProgram(std::size_t program) { 76 case Maxwell::ComparisonOp::LessEqualOld:
60 return program == 0 ? 0 : program - 1; 77 return Shader::CompareFunction::LessThanEqual;
78 case Maxwell::ComparisonOp::Greater:
79 case Maxwell::ComparisonOp::GreaterOld:
80 return Shader::CompareFunction::Greater;
81 case Maxwell::ComparisonOp::NotEqual:
82 case Maxwell::ComparisonOp::NotEqualOld:
83 return Shader::CompareFunction::NotEqual;
84 case Maxwell::ComparisonOp::GreaterEqual:
85 case Maxwell::ComparisonOp::GreaterEqualOld:
86 return Shader::CompareFunction::GreaterThanEqual;
87 case Maxwell::ComparisonOp::Always:
88 case Maxwell::ComparisonOp::AlwaysOld:
89 return Shader::CompareFunction::Always;
90 }
91 UNIMPLEMENTED_MSG("Unimplemented comparison op={}", comparison);
92 return {};
61} 93}
62 94
63constexpr ShaderType GetStageFromProgram(Maxwell::ShaderProgram program) { 95Shader::AttributeType CastAttributeType(const FixedPipelineState::VertexAttribute& attr) {
64 return static_cast<ShaderType>(GetStageFromProgram(static_cast<std::size_t>(program))); 96 if (attr.enabled == 0) {
97 return Shader::AttributeType::Disabled;
98 }
99 switch (attr.Type()) {
100 case Maxwell::VertexAttribute::Type::SignedNorm:
101 case Maxwell::VertexAttribute::Type::UnsignedNorm:
102 case Maxwell::VertexAttribute::Type::UnsignedScaled:
103 case Maxwell::VertexAttribute::Type::SignedScaled:
104 case Maxwell::VertexAttribute::Type::Float:
105 return Shader::AttributeType::Float;
106 case Maxwell::VertexAttribute::Type::SignedInt:
107 return Shader::AttributeType::SignedInt;
108 case Maxwell::VertexAttribute::Type::UnsignedInt:
109 return Shader::AttributeType::UnsignedInt;
110 }
111 return Shader::AttributeType::Float;
65} 112}
66 113
67ShaderType GetShaderType(Maxwell::ShaderProgram program) { 114Shader::AttributeType AttributeType(const FixedPipelineState& state, size_t index) {
68 switch (program) { 115 switch (state.DynamicAttributeType(index)) {
69 case Maxwell::ShaderProgram::VertexB: 116 case 0:
70 return ShaderType::Vertex; 117 return Shader::AttributeType::Disabled;
71 case Maxwell::ShaderProgram::TesselationControl: 118 case 1:
72 return ShaderType::TesselationControl; 119 return Shader::AttributeType::Float;
73 case Maxwell::ShaderProgram::TesselationEval: 120 case 2:
74 return ShaderType::TesselationEval; 121 return Shader::AttributeType::SignedInt;
75 case Maxwell::ShaderProgram::Geometry: 122 case 3:
76 return ShaderType::Geometry; 123 return Shader::AttributeType::UnsignedInt;
77 case Maxwell::ShaderProgram::Fragment:
78 return ShaderType::Fragment;
79 default:
80 UNIMPLEMENTED_MSG("program={}", program);
81 return ShaderType::Vertex;
82 } 124 }
125 return Shader::AttributeType::Disabled;
83} 126}
84 127
85template <VkDescriptorType descriptor_type, class Container> 128Shader::RuntimeInfo MakeRuntimeInfo(std::span<const Shader::IR::Program> programs,
86void AddBindings(std::vector<VkDescriptorSetLayoutBinding>& bindings, u32& binding, 129 const GraphicsPipelineCacheKey& key,
87 VkShaderStageFlags stage_flags, const Container& container) { 130 const Shader::IR::Program& program,
88 const u32 num_entries = static_cast<u32>(std::size(container)); 131 const Shader::IR::Program* previous_program) {
89 for (std::size_t i = 0; i < num_entries; ++i) { 132 Shader::RuntimeInfo info;
90 u32 count = 1; 133 if (previous_program) {
91 if constexpr (descriptor_type == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER) { 134 info.previous_stage_stores = previous_program->info.stores;
92 // Combined image samplers can be arrayed. 135 if (previous_program->is_geometry_passthrough) {
93 count = container[i].size; 136 info.previous_stage_stores.mask |= previous_program->info.passthrough.mask;
94 } 137 }
95 bindings.push_back({ 138 } else {
96 .binding = binding++, 139 info.previous_stage_stores.mask.set();
97 .descriptorType = descriptor_type, 140 }
98 .descriptorCount = count, 141 const Shader::Stage stage{program.stage};
99 .stageFlags = stage_flags, 142 const bool has_geometry{key.unique_hashes[4] != 0 && !programs[4].is_geometry_passthrough};
100 .pImmutableSamplers = nullptr, 143 const bool gl_ndc{key.state.ndc_minus_one_to_one != 0};
101 }); 144 const float point_size{Common::BitCast<float>(key.state.point_size)};
145 switch (stage) {
146 case Shader::Stage::VertexB:
147 if (!has_geometry) {
148 if (key.state.topology == Maxwell::PrimitiveTopology::Points) {
149 info.fixed_state_point_size = point_size;
150 }
151 if (key.state.xfb_enabled) {
152 info.xfb_varyings = VideoCommon::MakeTransformFeedbackVaryings(key.state.xfb_state);
153 }
154 info.convert_depth_mode = gl_ndc;
155 }
156 if (key.state.dynamic_vertex_input) {
157 for (size_t index = 0; index < Maxwell::NumVertexAttributes; ++index) {
158 info.generic_input_types[index] = AttributeType(key.state, index);
159 }
160 } else {
161 std::ranges::transform(key.state.attributes, info.generic_input_types.begin(),
162 &CastAttributeType);
163 }
164 break;
165 case Shader::Stage::TessellationEval:
166 // We have to flip tessellation clockwise for some reason...
167 info.tess_clockwise = key.state.tessellation_clockwise == 0;
168 info.tess_primitive = [&key] {
169 const u32 raw{key.state.tessellation_primitive.Value()};
170 switch (static_cast<Maxwell::TessellationPrimitive>(raw)) {
171 case Maxwell::TessellationPrimitive::Isolines:
172 return Shader::TessPrimitive::Isolines;
173 case Maxwell::TessellationPrimitive::Triangles:
174 return Shader::TessPrimitive::Triangles;
175 case Maxwell::TessellationPrimitive::Quads:
176 return Shader::TessPrimitive::Quads;
177 }
178 UNREACHABLE();
179 return Shader::TessPrimitive::Triangles;
180 }();
181 info.tess_spacing = [&] {
182 const u32 raw{key.state.tessellation_spacing};
183 switch (static_cast<Maxwell::TessellationSpacing>(raw)) {
184 case Maxwell::TessellationSpacing::Equal:
185 return Shader::TessSpacing::Equal;
186 case Maxwell::TessellationSpacing::FractionalOdd:
187 return Shader::TessSpacing::FractionalOdd;
188 case Maxwell::TessellationSpacing::FractionalEven:
189 return Shader::TessSpacing::FractionalEven;
190 }
191 UNREACHABLE();
192 return Shader::TessSpacing::Equal;
193 }();
194 break;
195 case Shader::Stage::Geometry:
196 if (program.output_topology == Shader::OutputTopology::PointList) {
197 info.fixed_state_point_size = point_size;
198 }
199 if (key.state.xfb_enabled != 0) {
200 info.xfb_varyings = VideoCommon::MakeTransformFeedbackVaryings(key.state.xfb_state);
201 }
202 info.convert_depth_mode = gl_ndc;
203 break;
204 case Shader::Stage::Fragment:
205 info.alpha_test_func = MaxwellToCompareFunction(
206 key.state.UnpackComparisonOp(key.state.alpha_test_func.Value()));
207 info.alpha_test_reference = Common::BitCast<float>(key.state.alpha_test_ref);
208 break;
209 default:
210 break;
211 }
212 switch (key.state.topology) {
213 case Maxwell::PrimitiveTopology::Points:
214 info.input_topology = Shader::InputTopology::Points;
215 break;
216 case Maxwell::PrimitiveTopology::Lines:
217 case Maxwell::PrimitiveTopology::LineLoop:
218 case Maxwell::PrimitiveTopology::LineStrip:
219 info.input_topology = Shader::InputTopology::Lines;
220 break;
221 case Maxwell::PrimitiveTopology::Triangles:
222 case Maxwell::PrimitiveTopology::TriangleStrip:
223 case Maxwell::PrimitiveTopology::TriangleFan:
224 case Maxwell::PrimitiveTopology::Quads:
225 case Maxwell::PrimitiveTopology::QuadStrip:
226 case Maxwell::PrimitiveTopology::Polygon:
227 case Maxwell::PrimitiveTopology::Patches:
228 info.input_topology = Shader::InputTopology::Triangles;
229 break;
230 case Maxwell::PrimitiveTopology::LinesAdjacency:
231 case Maxwell::PrimitiveTopology::LineStripAdjacency:
232 info.input_topology = Shader::InputTopology::LinesAdjacency;
233 break;
234 case Maxwell::PrimitiveTopology::TrianglesAdjacency:
235 case Maxwell::PrimitiveTopology::TriangleStripAdjacency:
236 info.input_topology = Shader::InputTopology::TrianglesAdjacency;
237 break;
102 } 238 }
239 info.force_early_z = key.state.early_z != 0;
240 info.y_negate = key.state.y_negate != 0;
241 return info;
103} 242}
243} // Anonymous namespace
104 244
105u32 FillDescriptorLayout(const ShaderEntries& entries, 245size_t ComputePipelineCacheKey::Hash() const noexcept {
106 std::vector<VkDescriptorSetLayoutBinding>& bindings, 246 const u64 hash = Common::CityHash64(reinterpret_cast<const char*>(this), sizeof *this);
107 Maxwell::ShaderProgram program_type, u32 base_binding) { 247 return static_cast<size_t>(hash);
108 const ShaderType stage = GetStageFromProgram(program_type);
109 const VkShaderStageFlags flags = MaxwellToVK::ShaderStage(stage);
110
111 u32 binding = base_binding;
112 AddBindings<UNIFORM_BUFFER>(bindings, binding, flags, entries.const_buffers);
113 AddBindings<STORAGE_BUFFER>(bindings, binding, flags, entries.global_buffers);
114 AddBindings<UNIFORM_TEXEL_BUFFER>(bindings, binding, flags, entries.uniform_texels);
115 AddBindings<COMBINED_IMAGE_SAMPLER>(bindings, binding, flags, entries.samplers);
116 AddBindings<STORAGE_TEXEL_BUFFER>(bindings, binding, flags, entries.storage_texels);
117 AddBindings<STORAGE_IMAGE>(bindings, binding, flags, entries.images);
118 return binding;
119} 248}
120 249
121} // Anonymous namespace 250bool ComputePipelineCacheKey::operator==(const ComputePipelineCacheKey& rhs) const noexcept {
251 return std::memcmp(&rhs, this, sizeof *this) == 0;
252}
122 253
123std::size_t GraphicsPipelineCacheKey::Hash() const noexcept { 254size_t GraphicsPipelineCacheKey::Hash() const noexcept {
124 const u64 hash = Common::CityHash64(reinterpret_cast<const char*>(this), Size()); 255 const u64 hash = Common::CityHash64(reinterpret_cast<const char*>(this), Size());
125 return static_cast<std::size_t>(hash); 256 return static_cast<size_t>(hash);
126} 257}
127 258
128bool GraphicsPipelineCacheKey::operator==(const GraphicsPipelineCacheKey& rhs) const noexcept { 259bool GraphicsPipelineCacheKey::operator==(const GraphicsPipelineCacheKey& rhs) const noexcept {
129 return std::memcmp(&rhs, this, Size()) == 0; 260 return std::memcmp(&rhs, this, Size()) == 0;
130} 261}
131 262
132std::size_t ComputePipelineCacheKey::Hash() const noexcept { 263PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::Engines::Maxwell3D& maxwell3d_,
133 const u64 hash = Common::CityHash64(reinterpret_cast<const char*>(this), sizeof *this); 264 Tegra::Engines::KeplerCompute& kepler_compute_,
134 return static_cast<std::size_t>(hash); 265 Tegra::MemoryManager& gpu_memory_, const Device& device_,
135} 266 VKScheduler& scheduler_, DescriptorPool& descriptor_pool_,
136 267 VKUpdateDescriptorQueue& update_descriptor_queue_,
137bool ComputePipelineCacheKey::operator==(const ComputePipelineCacheKey& rhs) const noexcept { 268 RenderPassCache& render_pass_cache_, BufferCache& buffer_cache_,
138 return std::memcmp(&rhs, this, sizeof *this) == 0; 269 TextureCache& texture_cache_, VideoCore::ShaderNotify& shader_notify_)
270 : VideoCommon::ShaderCache{rasterizer_, gpu_memory_, maxwell3d_, kepler_compute_},
271 device{device_}, scheduler{scheduler_}, descriptor_pool{descriptor_pool_},
272 update_descriptor_queue{update_descriptor_queue_}, render_pass_cache{render_pass_cache_},
273 buffer_cache{buffer_cache_}, texture_cache{texture_cache_}, shader_notify{shader_notify_},
274 use_asynchronous_shaders{Settings::values.use_asynchronous_shaders.GetValue()},
275 workers(std::max(std::thread::hardware_concurrency(), 2U) - 1, "yuzu:PipelineBuilder"),
276 serialization_thread(1, "yuzu:PipelineSerialization") {
277 const auto& float_control{device.FloatControlProperties()};
278 const VkDriverIdKHR driver_id{device.GetDriverID()};
279 profile = Shader::Profile{
280 .supported_spirv = device.IsKhrSpirv1_4Supported() ? 0x00010400U : 0x00010000U,
281 .unified_descriptor_binding = true,
282 .support_descriptor_aliasing = true,
283 .support_int8 = true,
284 .support_int16 = device.IsShaderInt16Supported(),
285 .support_int64 = device.IsShaderInt64Supported(),
286 .support_vertex_instance_id = false,
287 .support_float_controls = true,
288 .support_separate_denorm_behavior = float_control.denormBehaviorIndependence ==
289 VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL_KHR,
290 .support_separate_rounding_mode =
291 float_control.roundingModeIndependence == VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL_KHR,
292 .support_fp16_denorm_preserve = float_control.shaderDenormPreserveFloat16 != VK_FALSE,
293 .support_fp32_denorm_preserve = float_control.shaderDenormPreserveFloat32 != VK_FALSE,
294 .support_fp16_denorm_flush = float_control.shaderDenormFlushToZeroFloat16 != VK_FALSE,
295 .support_fp32_denorm_flush = float_control.shaderDenormFlushToZeroFloat32 != VK_FALSE,
296 .support_fp16_signed_zero_nan_preserve =
297 float_control.shaderSignedZeroInfNanPreserveFloat16 != VK_FALSE,
298 .support_fp32_signed_zero_nan_preserve =
299 float_control.shaderSignedZeroInfNanPreserveFloat32 != VK_FALSE,
300 .support_fp64_signed_zero_nan_preserve =
301 float_control.shaderSignedZeroInfNanPreserveFloat64 != VK_FALSE,
302 .support_explicit_workgroup_layout = device.IsKhrWorkgroupMemoryExplicitLayoutSupported(),
303 .support_vote = true,
304 .support_viewport_index_layer_non_geometry =
305 device.IsExtShaderViewportIndexLayerSupported(),
306 .support_viewport_mask = device.IsNvViewportArray2Supported(),
307 .support_typeless_image_loads = device.IsFormatlessImageLoadSupported(),
308 .support_demote_to_helper_invocation = true,
309 .support_int64_atomics = device.IsExtShaderAtomicInt64Supported(),
310 .support_derivative_control = true,
311 .support_geometry_shader_passthrough = device.IsNvGeometryShaderPassthroughSupported(),
312
313 .warp_size_potentially_larger_than_guest = device.IsWarpSizePotentiallyBiggerThanGuest(),
314
315 .lower_left_origin_mode = false,
316 .need_declared_frag_colors = false,
317
318 .has_broken_spirv_clamp = driver_id == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS_KHR,
319 .has_broken_unsigned_image_offsets = false,
320 .has_broken_signed_operations = false,
321 .has_broken_fp16_float_controls = driver_id == VK_DRIVER_ID_NVIDIA_PROPRIETARY_KHR,
322 .ignore_nan_fp_comparisons = false,
323 };
324 host_info = Shader::HostTranslateInfo{
325 .support_float16 = device.IsFloat16Supported(),
326 .support_int64 = device.IsShaderInt64Supported(),
327 };
139} 328}
140 329
141Shader::Shader(Tegra::Engines::ConstBufferEngineInterface& engine_, ShaderType stage_, 330PipelineCache::~PipelineCache() = default;
142 GPUVAddr gpu_addr_, VAddr cpu_addr_, ProgramCode program_code_, u32 main_offset_)
143 : gpu_addr(gpu_addr_), program_code(std::move(program_code_)), registry(stage_, engine_),
144 shader_ir(program_code, main_offset_, compiler_settings, registry),
145 entries(GenerateShaderEntries(shader_ir)) {}
146
147Shader::~Shader() = default;
148
149VKPipelineCache::VKPipelineCache(RasterizerVulkan& rasterizer_, Tegra::GPU& gpu_,
150 Tegra::Engines::Maxwell3D& maxwell3d_,
151 Tegra::Engines::KeplerCompute& kepler_compute_,
152 Tegra::MemoryManager& gpu_memory_, const Device& device_,
153 VKScheduler& scheduler_, VKDescriptorPool& descriptor_pool_,
154 VKUpdateDescriptorQueue& update_descriptor_queue_)
155 : VideoCommon::ShaderCache<Shader>{rasterizer_}, gpu{gpu_}, maxwell3d{maxwell3d_},
156 kepler_compute{kepler_compute_}, gpu_memory{gpu_memory_}, device{device_},
157 scheduler{scheduler_}, descriptor_pool{descriptor_pool_}, update_descriptor_queue{
158 update_descriptor_queue_} {}
159
160VKPipelineCache::~VKPipelineCache() = default;
161 331
162std::array<Shader*, Maxwell::MaxShaderProgram> VKPipelineCache::GetShaders() { 332GraphicsPipeline* PipelineCache::CurrentGraphicsPipeline() {
163 std::array<Shader*, Maxwell::MaxShaderProgram> shaders{}; 333 MICROPROFILE_SCOPE(Vulkan_PipelineCache);
164
165 for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) {
166 const auto program{static_cast<Maxwell::ShaderProgram>(index)};
167
168 // Skip stages that are not enabled
169 if (!maxwell3d.regs.IsShaderConfigEnabled(index)) {
170 continue;
171 }
172
173 const GPUVAddr gpu_addr{GetShaderAddress(maxwell3d, program)};
174 const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr);
175 ASSERT(cpu_addr);
176
177 Shader* result = cpu_addr ? TryGet(*cpu_addr) : null_shader.get();
178 if (!result) {
179 const u8* const host_ptr{gpu_memory.GetPointer(gpu_addr)};
180
181 // No shader found - create a new one
182 static constexpr u32 stage_offset = STAGE_MAIN_OFFSET;
183 const auto stage = static_cast<ShaderType>(index == 0 ? 0 : index - 1);
184 ProgramCode code = GetShaderCode(gpu_memory, gpu_addr, host_ptr, false);
185 const std::size_t size_in_bytes = code.size() * sizeof(u64);
186
187 auto shader = std::make_unique<Shader>(maxwell3d, stage, gpu_addr, *cpu_addr,
188 std::move(code), stage_offset);
189 result = shader.get();
190 334
191 if (cpu_addr) { 335 if (!RefreshStages(graphics_key.unique_hashes)) {
192 Register(std::move(shader), *cpu_addr, size_in_bytes); 336 current_pipeline = nullptr;
193 } else { 337 return nullptr;
194 null_shader = std::move(shader); 338 }
195 } 339 graphics_key.state.Refresh(maxwell3d, device.IsExtExtendedDynamicStateSupported(),
340 device.IsExtVertexInputDynamicStateSupported());
341
342 if (current_pipeline) {
343 GraphicsPipeline* const next{current_pipeline->Next(graphics_key)};
344 if (next) {
345 current_pipeline = next;
346 return BuiltPipeline(current_pipeline);
196 } 347 }
197 shaders[index] = result;
198 } 348 }
199 return last_shaders = shaders; 349 return CurrentGraphicsPipelineSlowPath();
200} 350}
201 351
202VKGraphicsPipeline* VKPipelineCache::GetGraphicsPipeline( 352ComputePipeline* PipelineCache::CurrentComputePipeline() {
203 const GraphicsPipelineCacheKey& key, u32 num_color_buffers,
204 VideoCommon::Shader::AsyncShaders& async_shaders) {
205 MICROPROFILE_SCOPE(Vulkan_PipelineCache); 353 MICROPROFILE_SCOPE(Vulkan_PipelineCache);
206 354
207 if (last_graphics_pipeline && last_graphics_key == key) { 355 const ShaderInfo* const shader{ComputeShader()};
208 return last_graphics_pipeline; 356 if (!shader) {
209 } 357 return nullptr;
210 last_graphics_key = key;
211
212 if (device.UseAsynchronousShaders() && async_shaders.IsShaderAsync(gpu)) {
213 std::unique_lock lock{pipeline_cache};
214 const auto [pair, is_cache_miss] = graphics_cache.try_emplace(key);
215 if (is_cache_miss) {
216 gpu.ShaderNotify().MarkSharderBuilding();
217 LOG_INFO(Render_Vulkan, "Compile 0x{:016X}", key.Hash());
218 const auto [program, bindings] = DecompileShaders(key.fixed_state);
219 async_shaders.QueueVulkanShader(this, device, scheduler, descriptor_pool,
220 update_descriptor_queue, bindings, program, key,
221 num_color_buffers);
222 }
223 last_graphics_pipeline = pair->second.get();
224 return last_graphics_pipeline;
225 } 358 }
226 359 const auto& qmd{kepler_compute.launch_description};
227 const auto [pair, is_cache_miss] = graphics_cache.try_emplace(key); 360 const ComputePipelineCacheKey key{
228 auto& entry = pair->second; 361 .unique_hash = shader->unique_hash,
229 if (is_cache_miss) { 362 .shared_memory_size = qmd.shared_alloc,
230 gpu.ShaderNotify().MarkSharderBuilding(); 363 .workgroup_size{qmd.block_dim_x, qmd.block_dim_y, qmd.block_dim_z},
231 LOG_INFO(Render_Vulkan, "Compile 0x{:016X}", key.Hash()); 364 };
232 const auto [program, bindings] = DecompileShaders(key.fixed_state); 365 const auto [pair, is_new]{compute_cache.try_emplace(key)};
233 entry = std::make_unique<VKGraphicsPipeline>(device, scheduler, descriptor_pool, 366 auto& pipeline{pair->second};
234 update_descriptor_queue, key, bindings, 367 if (!is_new) {
235 program, num_color_buffers); 368 return pipeline.get();
236 gpu.ShaderNotify().MarkShaderComplete();
237 } 369 }
238 last_graphics_pipeline = entry.get(); 370 pipeline = CreateComputePipeline(key, shader);
239 return last_graphics_pipeline; 371 return pipeline.get();
240} 372}
241 373
242VKComputePipeline& VKPipelineCache::GetComputePipeline(const ComputePipelineCacheKey& key) { 374void PipelineCache::LoadDiskResources(u64 title_id, std::stop_token stop_loading,
243 MICROPROFILE_SCOPE(Vulkan_PipelineCache); 375 const VideoCore::DiskResourceLoadCallback& callback) {
244 376 if (title_id == 0) {
245 const auto [pair, is_cache_miss] = compute_cache.try_emplace(key); 377 return;
246 auto& entry = pair->second;
247 if (!is_cache_miss) {
248 return *entry;
249 } 378 }
250 LOG_INFO(Render_Vulkan, "Compile 0x{:016X}", key.Hash()); 379 const auto shader_dir{Common::FS::GetYuzuPath(Common::FS::YuzuPath::ShaderDir)};
251 380 const auto base_dir{shader_dir / fmt::format("{:016x}", title_id)};
252 const GPUVAddr gpu_addr = key.shader; 381 if (!Common::FS::CreateDir(shader_dir) || !Common::FS::CreateDir(base_dir)) {
253 382 LOG_ERROR(Common_Filesystem, "Failed to create pipeline cache directories");
254 const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); 383 return;
255 ASSERT(cpu_addr); 384 }
385 pipeline_cache_filename = base_dir / "vulkan.bin";
386
387 struct {
388 std::mutex mutex;
389 size_t total{};
390 size_t built{};
391 bool has_loaded{};
392 } state;
393
394 const auto load_compute{[&](std::ifstream& file, FileEnvironment env) {
395 ComputePipelineCacheKey key;
396 file.read(reinterpret_cast<char*>(&key), sizeof(key));
397
398 workers.QueueWork([this, key, env = std::move(env), &state, &callback]() mutable {
399 ShaderPools pools;
400 auto pipeline{CreateComputePipeline(pools, key, env, false)};
401 std::lock_guard lock{state.mutex};
402 if (pipeline) {
403 compute_cache.emplace(key, std::move(pipeline));
404 }
405 ++state.built;
406 if (state.has_loaded) {
407 callback(VideoCore::LoadCallbackStage::Build, state.built, state.total);
408 }
409 });
410 ++state.total;
411 }};
412 const bool extended_dynamic_state = device.IsExtExtendedDynamicStateSupported();
413 const bool dynamic_vertex_input = device.IsExtVertexInputDynamicStateSupported();
414 const auto load_graphics{[&](std::ifstream& file, std::vector<FileEnvironment> envs) {
415 GraphicsPipelineCacheKey key;
416 file.read(reinterpret_cast<char*>(&key), sizeof(key));
417
418 if ((key.state.extended_dynamic_state != 0) != extended_dynamic_state ||
419 (key.state.dynamic_vertex_input != 0) != dynamic_vertex_input) {
420 return;
421 }
422 workers.QueueWork([this, key, envs = std::move(envs), &state, &callback]() mutable {
423 ShaderPools pools;
424 boost::container::static_vector<Shader::Environment*, 5> env_ptrs;
425 for (auto& env : envs) {
426 env_ptrs.push_back(&env);
427 }
428 auto pipeline{CreateGraphicsPipeline(pools, key, MakeSpan(env_ptrs), false)};
256 429
257 Shader* shader = cpu_addr ? TryGet(*cpu_addr) : null_kernel.get(); 430 std::lock_guard lock{state.mutex};
258 if (!shader) { 431 graphics_cache.emplace(key, std::move(pipeline));
259 // No shader found - create a new one 432 ++state.built;
260 const auto host_ptr = gpu_memory.GetPointer(gpu_addr); 433 if (state.has_loaded) {
434 callback(VideoCore::LoadCallbackStage::Build, state.built, state.total);
435 }
436 });
437 ++state.total;
438 }};
439 VideoCommon::LoadPipelines(stop_loading, pipeline_cache_filename, CACHE_VERSION, load_compute,
440 load_graphics);
261 441
262 ProgramCode code = GetShaderCode(gpu_memory, gpu_addr, host_ptr, true); 442 std::unique_lock lock{state.mutex};
263 const std::size_t size_in_bytes = code.size() * sizeof(u64); 443 callback(VideoCore::LoadCallbackStage::Build, 0, state.total);
444 state.has_loaded = true;
445 lock.unlock();
264 446
265 auto shader_info = std::make_unique<Shader>(kepler_compute, ShaderType::Compute, gpu_addr, 447 workers.WaitForRequests();
266 *cpu_addr, std::move(code), KERNEL_MAIN_OFFSET); 448}
267 shader = shader_info.get();
268 449
269 if (cpu_addr) { 450GraphicsPipeline* PipelineCache::CurrentGraphicsPipelineSlowPath() {
270 Register(std::move(shader_info), *cpu_addr, size_in_bytes); 451 const auto [pair, is_new]{graphics_cache.try_emplace(graphics_key)};
271 } else { 452 auto& pipeline{pair->second};
272 null_kernel = std::move(shader_info); 453 if (is_new) {
273 } 454 pipeline = CreateGraphicsPipeline();
274 } 455 }
275 456 if (!pipeline) {
276 const Specialization specialization{ 457 return nullptr;
277 .base_binding = 0, 458 }
278 .workgroup_size = key.workgroup_size, 459 if (current_pipeline) {
279 .shared_memory_size = key.shared_memory_size, 460 current_pipeline->AddTransition(pipeline.get());
280 .point_size = std::nullopt, 461 }
281 .enabled_attributes = {}, 462 current_pipeline = pipeline.get();
282 .attribute_types = {}, 463 return BuiltPipeline(current_pipeline);
283 .ndc_minus_one_to_one = false,
284 };
285 const SPIRVShader spirv_shader{Decompile(device, shader->GetIR(), ShaderType::Compute,
286 shader->GetRegistry(), specialization),
287 shader->GetEntries()};
288 entry = std::make_unique<VKComputePipeline>(device, scheduler, descriptor_pool,
289 update_descriptor_queue, spirv_shader);
290 return *entry;
291} 464}
292 465
293void VKPipelineCache::EmplacePipeline(std::unique_ptr<VKGraphicsPipeline> pipeline) { 466GraphicsPipeline* PipelineCache::BuiltPipeline(GraphicsPipeline* pipeline) const noexcept {
294 gpu.ShaderNotify().MarkShaderComplete(); 467 if (pipeline->IsBuilt()) {
295 std::unique_lock lock{pipeline_cache}; 468 return pipeline;
296 graphics_cache.at(pipeline->GetCacheKey()) = std::move(pipeline); 469 }
470 if (!use_asynchronous_shaders) {
471 return pipeline;
472 }
473 // If something is using depth, we can assume that games are not rendering anything which
474 // will be used one time.
475 if (maxwell3d.regs.zeta_enable) {
476 return nullptr;
477 }
478 // If games are using a small index count, we can assume these are full screen quads.
479 // Usually these shaders are only used once for building textures so we can assume they
480 // can't be built async
481 if (maxwell3d.regs.index_array.count <= 6 || maxwell3d.regs.vertex_buffer.count <= 6) {
482 return pipeline;
483 }
484 return nullptr;
297} 485}
298 486
299void VKPipelineCache::OnShaderRemoval(Shader* shader) { 487std::unique_ptr<GraphicsPipeline> PipelineCache::CreateGraphicsPipeline(
300 bool finished = false; 488 ShaderPools& pools, const GraphicsPipelineCacheKey& key,
301 const auto Finish = [&] { 489 std::span<Shader::Environment* const> envs, bool build_in_parallel) try {
302 // TODO(Rodrigo): Instead of finishing here, wait for the fences that use this pipeline and 490 LOG_INFO(Render_Vulkan, "0x{:016x}", key.Hash());
303 // flush. 491 size_t env_index{0};
304 if (finished) { 492 std::array<Shader::IR::Program, Maxwell::MaxShaderProgram> programs;
305 return; 493 const bool uses_vertex_a{key.unique_hashes[0] != 0};
306 } 494 const bool uses_vertex_b{key.unique_hashes[1] != 0};
307 finished = true; 495 for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) {
308 scheduler.Finish(); 496 if (key.unique_hashes[index] == 0) {
309 };
310
311 const GPUVAddr invalidated_addr = shader->GetGpuAddr();
312 for (auto it = graphics_cache.begin(); it != graphics_cache.end();) {
313 auto& entry = it->first;
314 if (std::find(entry.shaders.begin(), entry.shaders.end(), invalidated_addr) ==
315 entry.shaders.end()) {
316 ++it;
317 continue; 497 continue;
318 } 498 }
319 Finish(); 499 Shader::Environment& env{*envs[env_index]};
320 it = graphics_cache.erase(it); 500 ++env_index;
501
502 const u32 cfg_offset{static_cast<u32>(env.StartAddress() + sizeof(Shader::ProgramHeader))};
503 Shader::Maxwell::Flow::CFG cfg(env, pools.flow_block, cfg_offset, index == 0);
504 if (!uses_vertex_a || index != 1) {
505 // Normal path
506 programs[index] = TranslateProgram(pools.inst, pools.block, env, cfg, host_info);
507 } else {
508 // VertexB path when VertexA is present.
509 auto& program_va{programs[0]};
510 auto program_vb{TranslateProgram(pools.inst, pools.block, env, cfg, host_info)};
511 programs[index] = MergeDualVertexPrograms(program_va, program_vb, env);
512 }
321 } 513 }
322 for (auto it = compute_cache.begin(); it != compute_cache.end();) { 514 std::array<const Shader::Info*, Maxwell::MaxShaderStage> infos{};
323 auto& entry = it->first; 515 std::array<vk::ShaderModule, Maxwell::MaxShaderStage> modules;
324 if (entry.shader != invalidated_addr) { 516
325 ++it; 517 const Shader::IR::Program* previous_stage{};
518 Shader::Backend::Bindings binding;
519 for (size_t index = uses_vertex_a && uses_vertex_b ? 1 : 0; index < Maxwell::MaxShaderProgram;
520 ++index) {
521 if (key.unique_hashes[index] == 0) {
326 continue; 522 continue;
327 } 523 }
328 Finish(); 524 UNIMPLEMENTED_IF(index == 0);
329 it = compute_cache.erase(it); 525
526 Shader::IR::Program& program{programs[index]};
527 const size_t stage_index{index - 1};
528 infos[stage_index] = &program.info;
529
530 const auto runtime_info{MakeRuntimeInfo(programs, key, program, previous_stage)};
531 const std::vector<u32> code{EmitSPIRV(profile, runtime_info, program, binding)};
532 device.SaveShader(code);
533 modules[stage_index] = BuildShader(device, code);
534 if (device.HasDebuggingToolAttached()) {
535 const std::string name{fmt::format("Shader {:016x}", key.unique_hashes[index])};
536 modules[stage_index].SetObjectNameEXT(name.c_str());
537 }
538 previous_stage = &program;
330 } 539 }
540 Common::ThreadWorker* const thread_worker{build_in_parallel ? &workers : nullptr};
541 return std::make_unique<GraphicsPipeline>(
542 maxwell3d, gpu_memory, scheduler, buffer_cache, texture_cache, &shader_notify, device,
543 descriptor_pool, update_descriptor_queue, thread_worker, render_pass_cache, key,
544 std::move(modules), infos);
545
546} catch (const Shader::Exception& exception) {
547 LOG_ERROR(Render_Vulkan, "{}", exception.what());
548 return nullptr;
331} 549}
332 550
333std::pair<SPIRVProgram, std::vector<VkDescriptorSetLayoutBinding>> 551std::unique_ptr<GraphicsPipeline> PipelineCache::CreateGraphicsPipeline() {
334VKPipelineCache::DecompileShaders(const FixedPipelineState& fixed_state) { 552 GraphicsEnvironments environments;
335 Specialization specialization; 553 GetGraphicsEnvironments(environments, graphics_key.unique_hashes);
336 if (fixed_state.topology == Maxwell::PrimitiveTopology::Points) {
337 float point_size;
338 std::memcpy(&point_size, &fixed_state.point_size, sizeof(float));
339 specialization.point_size = point_size;
340 ASSERT(point_size != 0.0f);
341 }
342 for (std::size_t i = 0; i < Maxwell::NumVertexAttributes; ++i) {
343 const auto& attribute = fixed_state.attributes[i];
344 specialization.enabled_attributes[i] = attribute.enabled.Value() != 0;
345 specialization.attribute_types[i] = attribute.Type();
346 }
347 specialization.ndc_minus_one_to_one = fixed_state.ndc_minus_one_to_one;
348 specialization.early_fragment_tests = fixed_state.early_z;
349
350 // Alpha test
351 specialization.alpha_test_func =
352 FixedPipelineState::UnpackComparisonOp(fixed_state.alpha_test_func.Value());
353 specialization.alpha_test_ref = Common::BitCast<float>(fixed_state.alpha_test_ref);
354
355 SPIRVProgram program;
356 std::vector<VkDescriptorSetLayoutBinding> bindings;
357 554
358 for (std::size_t index = 1; index < Maxwell::MaxShaderProgram; ++index) { 555 main_pools.ReleaseContents();
359 const auto program_enum = static_cast<Maxwell::ShaderProgram>(index); 556 auto pipeline{CreateGraphicsPipeline(main_pools, graphics_key, environments.Span(), true)};
360 // Skip stages that are not enabled 557 if (!pipeline || pipeline_cache_filename.empty()) {
361 if (!maxwell3d.regs.IsShaderConfigEnabled(index)) { 558 return pipeline;
362 continue;
363 }
364 const GPUVAddr gpu_addr = GetShaderAddress(maxwell3d, program_enum);
365 const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr);
366 Shader* const shader = cpu_addr ? TryGet(*cpu_addr) : null_shader.get();
367
368 const std::size_t stage = index == 0 ? 0 : index - 1; // Stage indices are 0 - 5
369 const ShaderType program_type = GetShaderType(program_enum);
370 const auto& entries = shader->GetEntries();
371 program[stage] = {
372 Decompile(device, shader->GetIR(), program_type, shader->GetRegistry(), specialization),
373 entries,
374 };
375
376 const u32 old_binding = specialization.base_binding;
377 specialization.base_binding =
378 FillDescriptorLayout(entries, bindings, program_enum, specialization.base_binding);
379 ASSERT(old_binding + entries.NumBindings() == specialization.base_binding);
380 } 559 }
381 return {std::move(program), std::move(bindings)}; 560 serialization_thread.QueueWork([this, key = graphics_key, envs = std::move(environments.envs)] {
382} 561 boost::container::static_vector<const GenericEnvironment*, Maxwell::MaxShaderProgram>
383 562 env_ptrs;
384template <VkDescriptorType descriptor_type, class Container> 563 for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) {
385void AddEntry(std::vector<VkDescriptorUpdateTemplateEntry>& template_entries, u32& binding, 564 if (key.unique_hashes[index] != 0) {
386 u32& offset, const Container& container) { 565 env_ptrs.push_back(&envs[index]);
387 static constexpr u32 entry_size = static_cast<u32>(sizeof(DescriptorUpdateEntry)); 566 }
388 const u32 count = static_cast<u32>(std::size(container));
389
390 if constexpr (descriptor_type == COMBINED_IMAGE_SAMPLER) {
391 for (u32 i = 0; i < count; ++i) {
392 const u32 num_samplers = container[i].size;
393 template_entries.push_back({
394 .dstBinding = binding,
395 .dstArrayElement = 0,
396 .descriptorCount = num_samplers,
397 .descriptorType = descriptor_type,
398 .offset = offset,
399 .stride = entry_size,
400 });
401
402 ++binding;
403 offset += num_samplers * entry_size;
404 } 567 }
405 return; 568 SerializePipeline(key, env_ptrs, pipeline_cache_filename, CACHE_VERSION);
406 } 569 });
570 return pipeline;
571}
407 572
408 if constexpr (descriptor_type == UNIFORM_TEXEL_BUFFER || 573std::unique_ptr<ComputePipeline> PipelineCache::CreateComputePipeline(
409 descriptor_type == STORAGE_TEXEL_BUFFER) { 574 const ComputePipelineCacheKey& key, const ShaderInfo* shader) {
410 // Nvidia has a bug where updating multiple texels at once causes the driver to crash. 575 const GPUVAddr program_base{kepler_compute.regs.code_loc.Address()};
411 // Note: Fixed in driver Windows 443.24, Linux 440.66.15 576 const auto& qmd{kepler_compute.launch_description};
412 for (u32 i = 0; i < count; ++i) { 577 ComputeEnvironment env{kepler_compute, gpu_memory, program_base, qmd.program_start};
413 template_entries.push_back({ 578 env.SetCachedSize(shader->size_bytes);
414 .dstBinding = binding + i, 579
415 .dstArrayElement = 0, 580 main_pools.ReleaseContents();
416 .descriptorCount = 1, 581 auto pipeline{CreateComputePipeline(main_pools, key, env, true)};
417 .descriptorType = descriptor_type, 582 if (!pipeline || pipeline_cache_filename.empty()) {
418 .offset = static_cast<std::size_t>(offset + i * entry_size), 583 return pipeline;
419 .stride = entry_size,
420 });
421 }
422 } else if (count > 0) {
423 template_entries.push_back({
424 .dstBinding = binding,
425 .dstArrayElement = 0,
426 .descriptorCount = count,
427 .descriptorType = descriptor_type,
428 .offset = offset,
429 .stride = entry_size,
430 });
431 } 584 }
432 offset += count * entry_size; 585 serialization_thread.QueueWork([this, key, env = std::move(env)] {
433 binding += count; 586 SerializePipeline(key, std::array<const GenericEnvironment*, 1>{&env},
587 pipeline_cache_filename, CACHE_VERSION);
588 });
589 return pipeline;
434} 590}
435 591
436void FillDescriptorUpdateTemplateEntries( 592std::unique_ptr<ComputePipeline> PipelineCache::CreateComputePipeline(
437 const ShaderEntries& entries, u32& binding, u32& offset, 593 ShaderPools& pools, const ComputePipelineCacheKey& key, Shader::Environment& env,
438 std::vector<VkDescriptorUpdateTemplateEntryKHR>& template_entries) { 594 bool build_in_parallel) try {
439 AddEntry<UNIFORM_BUFFER>(template_entries, offset, binding, entries.const_buffers); 595 LOG_INFO(Render_Vulkan, "0x{:016x}", key.Hash());
440 AddEntry<STORAGE_BUFFER>(template_entries, offset, binding, entries.global_buffers); 596
441 AddEntry<UNIFORM_TEXEL_BUFFER>(template_entries, offset, binding, entries.uniform_texels); 597 Shader::Maxwell::Flow::CFG cfg{env, pools.flow_block, env.StartAddress()};
442 AddEntry<COMBINED_IMAGE_SAMPLER>(template_entries, offset, binding, entries.samplers); 598 auto program{TranslateProgram(pools.inst, pools.block, env, cfg, host_info)};
443 AddEntry<STORAGE_TEXEL_BUFFER>(template_entries, offset, binding, entries.storage_texels); 599 const std::vector<u32> code{EmitSPIRV(profile, program)};
444 AddEntry<STORAGE_IMAGE>(template_entries, offset, binding, entries.images); 600 device.SaveShader(code);
601 vk::ShaderModule spv_module{BuildShader(device, code)};
602 if (device.HasDebuggingToolAttached()) {
603 const auto name{fmt::format("Shader {:016x}", key.unique_hash)};
604 spv_module.SetObjectNameEXT(name.c_str());
605 }
606 Common::ThreadWorker* const thread_worker{build_in_parallel ? &workers : nullptr};
607 return std::make_unique<ComputePipeline>(device, descriptor_pool, update_descriptor_queue,
608 thread_worker, &shader_notify, program.info,
609 std::move(spv_module));
610
611} catch (const Shader::Exception& exception) {
612 LOG_ERROR(Render_Vulkan, "{}", exception.what());
613 return nullptr;
445} 614}
446 615
447} // namespace Vulkan 616} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h
index 89d635a3d..efe5a7ed8 100644
--- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h
@@ -6,24 +6,28 @@
6 6
7#include <array> 7#include <array>
8#include <cstddef> 8#include <cstddef>
9#include <filesystem>
10#include <iosfwd>
9#include <memory> 11#include <memory>
10#include <type_traits> 12#include <type_traits>
11#include <unordered_map> 13#include <unordered_map>
12#include <utility> 14#include <utility>
13#include <vector> 15#include <vector>
14 16
15#include <boost/functional/hash.hpp>
16
17#include "common/common_types.h" 17#include "common/common_types.h"
18#include "video_core/engines/const_buffer_engine_interface.h" 18#include "common/thread_worker.h"
19#include "shader_recompiler/frontend/ir/basic_block.h"
20#include "shader_recompiler/frontend/ir/value.h"
21#include "shader_recompiler/frontend/maxwell/control_flow.h"
22#include "shader_recompiler/host_translate_info.h"
23#include "shader_recompiler/object_pool.h"
24#include "shader_recompiler/profile.h"
19#include "video_core/engines/maxwell_3d.h" 25#include "video_core/engines/maxwell_3d.h"
20#include "video_core/renderer_vulkan/fixed_pipeline_state.h" 26#include "video_core/renderer_vulkan/fixed_pipeline_state.h"
27#include "video_core/renderer_vulkan/vk_buffer_cache.h"
28#include "video_core/renderer_vulkan/vk_compute_pipeline.h"
21#include "video_core/renderer_vulkan/vk_graphics_pipeline.h" 29#include "video_core/renderer_vulkan/vk_graphics_pipeline.h"
22#include "video_core/renderer_vulkan/vk_shader_decompiler.h" 30#include "video_core/renderer_vulkan/vk_texture_cache.h"
23#include "video_core/shader/async_shaders.h"
24#include "video_core/shader/memory_util.h"
25#include "video_core/shader/registry.h"
26#include "video_core/shader/shader_ir.h"
27#include "video_core/shader_cache.h" 31#include "video_core/shader_cache.h"
28#include "video_core/vulkan_common/vulkan_wrapper.h" 32#include "video_core/vulkan_common/vulkan_wrapper.h"
29 33
@@ -31,23 +35,24 @@ namespace Core {
31class System; 35class System;
32} 36}
33 37
34namespace Vulkan { 38namespace Shader::IR {
39struct Program;
40}
35 41
36class Device; 42namespace VideoCore {
37class RasterizerVulkan; 43class ShaderNotify;
38class VKComputePipeline; 44}
39class VKDescriptorPool; 45
40class VKScheduler; 46namespace Vulkan {
41class VKUpdateDescriptorQueue;
42 47
43using Maxwell = Tegra::Engines::Maxwell3D::Regs; 48using Maxwell = Tegra::Engines::Maxwell3D::Regs;
44 49
45struct ComputePipelineCacheKey { 50struct ComputePipelineCacheKey {
46 GPUVAddr shader; 51 u64 unique_hash;
47 u32 shared_memory_size; 52 u32 shared_memory_size;
48 std::array<u32, 3> workgroup_size; 53 std::array<u32, 3> workgroup_size;
49 54
50 std::size_t Hash() const noexcept; 55 size_t Hash() const noexcept;
51 56
52 bool operator==(const ComputePipelineCacheKey& rhs) const noexcept; 57 bool operator==(const ComputePipelineCacheKey& rhs) const noexcept;
53 58
@@ -64,15 +69,8 @@ static_assert(std::is_trivially_constructible_v<ComputePipelineCacheKey>);
64namespace std { 69namespace std {
65 70
66template <> 71template <>
67struct hash<Vulkan::GraphicsPipelineCacheKey> {
68 std::size_t operator()(const Vulkan::GraphicsPipelineCacheKey& k) const noexcept {
69 return k.Hash();
70 }
71};
72
73template <>
74struct hash<Vulkan::ComputePipelineCacheKey> { 72struct hash<Vulkan::ComputePipelineCacheKey> {
75 std::size_t operator()(const Vulkan::ComputePipelineCacheKey& k) const noexcept { 73 size_t operator()(const Vulkan::ComputePipelineCacheKey& k) const noexcept {
76 return k.Hash(); 74 return k.Hash();
77 } 75 }
78}; 76};
@@ -81,94 +79,90 @@ struct hash<Vulkan::ComputePipelineCacheKey> {
81 79
82namespace Vulkan { 80namespace Vulkan {
83 81
84class Shader { 82class ComputePipeline;
85public: 83class Device;
86 explicit Shader(Tegra::Engines::ConstBufferEngineInterface& engine_, 84class DescriptorPool;
87 Tegra::Engines::ShaderType stage_, GPUVAddr gpu_addr, VAddr cpu_addr_, 85class RasterizerVulkan;
88 VideoCommon::Shader::ProgramCode program_code, u32 main_offset_); 86class RenderPassCache;
89 ~Shader(); 87class VKScheduler;
90 88class VKUpdateDescriptorQueue;
91 GPUVAddr GetGpuAddr() const {
92 return gpu_addr;
93 }
94
95 VideoCommon::Shader::ShaderIR& GetIR() {
96 return shader_ir;
97 }
98
99 const VideoCommon::Shader::ShaderIR& GetIR() const {
100 return shader_ir;
101 }
102 89
103 const VideoCommon::Shader::Registry& GetRegistry() const { 90using VideoCommon::ShaderInfo;
104 return registry;
105 }
106 91
107 const ShaderEntries& GetEntries() const { 92struct ShaderPools {
108 return entries; 93 void ReleaseContents() {
94 flow_block.ReleaseContents();
95 block.ReleaseContents();
96 inst.ReleaseContents();
109 } 97 }
110 98
111private: 99 Shader::ObjectPool<Shader::IR::Inst> inst;
112 GPUVAddr gpu_addr{}; 100 Shader::ObjectPool<Shader::IR::Block> block;
113 VideoCommon::Shader::ProgramCode program_code; 101 Shader::ObjectPool<Shader::Maxwell::Flow::Block> flow_block;
114 VideoCommon::Shader::Registry registry;
115 VideoCommon::Shader::ShaderIR shader_ir;
116 ShaderEntries entries;
117}; 102};
118 103
119class VKPipelineCache final : public VideoCommon::ShaderCache<Shader> { 104class PipelineCache : public VideoCommon::ShaderCache {
120public: 105public:
121 explicit VKPipelineCache(RasterizerVulkan& rasterizer, Tegra::GPU& gpu, 106 explicit PipelineCache(RasterizerVulkan& rasterizer, Tegra::Engines::Maxwell3D& maxwell3d,
122 Tegra::Engines::Maxwell3D& maxwell3d, 107 Tegra::Engines::KeplerCompute& kepler_compute,
123 Tegra::Engines::KeplerCompute& kepler_compute, 108 Tegra::MemoryManager& gpu_memory, const Device& device,
124 Tegra::MemoryManager& gpu_memory, const Device& device, 109 VKScheduler& scheduler, DescriptorPool& descriptor_pool,
125 VKScheduler& scheduler, VKDescriptorPool& descriptor_pool, 110 VKUpdateDescriptorQueue& update_descriptor_queue,
126 VKUpdateDescriptorQueue& update_descriptor_queue); 111 RenderPassCache& render_pass_cache, BufferCache& buffer_cache,
127 ~VKPipelineCache() override; 112 TextureCache& texture_cache, VideoCore::ShaderNotify& shader_notify_);
113 ~PipelineCache();
114
115 [[nodiscard]] GraphicsPipeline* CurrentGraphicsPipeline();
128 116
129 std::array<Shader*, Maxwell::MaxShaderProgram> GetShaders(); 117 [[nodiscard]] ComputePipeline* CurrentComputePipeline();
130 118
131 VKGraphicsPipeline* GetGraphicsPipeline(const GraphicsPipelineCacheKey& key, 119 void LoadDiskResources(u64 title_id, std::stop_token stop_loading,
132 u32 num_color_buffers, 120 const VideoCore::DiskResourceLoadCallback& callback);
133 VideoCommon::Shader::AsyncShaders& async_shaders);
134 121
135 VKComputePipeline& GetComputePipeline(const ComputePipelineCacheKey& key); 122private:
123 [[nodiscard]] GraphicsPipeline* CurrentGraphicsPipelineSlowPath();
136 124
137 void EmplacePipeline(std::unique_ptr<VKGraphicsPipeline> pipeline); 125 [[nodiscard]] GraphicsPipeline* BuiltPipeline(GraphicsPipeline* pipeline) const noexcept;
138 126
139protected: 127 std::unique_ptr<GraphicsPipeline> CreateGraphicsPipeline();
140 void OnShaderRemoval(Shader* shader) final;
141 128
142private: 129 std::unique_ptr<GraphicsPipeline> CreateGraphicsPipeline(
143 std::pair<SPIRVProgram, std::vector<VkDescriptorSetLayoutBinding>> DecompileShaders( 130 ShaderPools& pools, const GraphicsPipelineCacheKey& key,
144 const FixedPipelineState& fixed_state); 131 std::span<Shader::Environment* const> envs, bool build_in_parallel);
145 132
146 Tegra::GPU& gpu; 133 std::unique_ptr<ComputePipeline> CreateComputePipeline(const ComputePipelineCacheKey& key,
147 Tegra::Engines::Maxwell3D& maxwell3d; 134 const ShaderInfo* shader);
148 Tegra::Engines::KeplerCompute& kepler_compute; 135
149 Tegra::MemoryManager& gpu_memory; 136 std::unique_ptr<ComputePipeline> CreateComputePipeline(ShaderPools& pools,
137 const ComputePipelineCacheKey& key,
138 Shader::Environment& env,
139 bool build_in_parallel);
150 140
151 const Device& device; 141 const Device& device;
152 VKScheduler& scheduler; 142 VKScheduler& scheduler;
153 VKDescriptorPool& descriptor_pool; 143 DescriptorPool& descriptor_pool;
154 VKUpdateDescriptorQueue& update_descriptor_queue; 144 VKUpdateDescriptorQueue& update_descriptor_queue;
145 RenderPassCache& render_pass_cache;
146 BufferCache& buffer_cache;
147 TextureCache& texture_cache;
148 VideoCore::ShaderNotify& shader_notify;
149 bool use_asynchronous_shaders{};
155 150
156 std::unique_ptr<Shader> null_shader; 151 GraphicsPipelineCacheKey graphics_key{};
157 std::unique_ptr<Shader> null_kernel; 152 GraphicsPipeline* current_pipeline{};
158 153
159 std::array<Shader*, Maxwell::MaxShaderProgram> last_shaders{}; 154 std::unordered_map<ComputePipelineCacheKey, std::unique_ptr<ComputePipeline>> compute_cache;
155 std::unordered_map<GraphicsPipelineCacheKey, std::unique_ptr<GraphicsPipeline>> graphics_cache;
160 156
161 GraphicsPipelineCacheKey last_graphics_key; 157 ShaderPools main_pools;
162 VKGraphicsPipeline* last_graphics_pipeline = nullptr;
163 158
164 std::mutex pipeline_cache; 159 Shader::Profile profile;
165 std::unordered_map<GraphicsPipelineCacheKey, std::unique_ptr<VKGraphicsPipeline>> 160 Shader::HostTranslateInfo host_info;
166 graphics_cache;
167 std::unordered_map<ComputePipelineCacheKey, std::unique_ptr<VKComputePipeline>> compute_cache;
168};
169 161
170void FillDescriptorUpdateTemplateEntries( 162 std::filesystem::path pipeline_cache_filename;
171 const ShaderEntries& entries, u32& binding, u32& offset, 163
172 std::vector<VkDescriptorUpdateTemplateEntryKHR>& template_entries); 164 Common::ThreadWorker workers;
165 Common::ThreadWorker serialization_thread;
166};
173 167
174} // namespace Vulkan 168} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_query_cache.cpp b/src/video_core/renderer_vulkan/vk_query_cache.cpp
index 7cadd5147..c9cb32d71 100644
--- a/src/video_core/renderer_vulkan/vk_query_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_query_cache.cpp
@@ -114,14 +114,10 @@ void HostCounter::EndQuery() {
114} 114}
115 115
116u64 HostCounter::BlockingQuery() const { 116u64 HostCounter::BlockingQuery() const {
117 if (tick >= cache.GetScheduler().CurrentTick()) { 117 cache.GetScheduler().Wait(tick);
118 cache.GetScheduler().Flush();
119 }
120
121 u64 data; 118 u64 data;
122 const VkResult query_result = cache.GetDevice().GetLogical().GetQueryResults( 119 const VkResult query_result = cache.GetDevice().GetLogical().GetQueryResults(
123 query.first, query.second, 1, sizeof(data), &data, sizeof(data), 120 query.first, query.second, 1, sizeof(data), &data, sizeof(data), VK_QUERY_RESULT_64_BIT);
124 VK_QUERY_RESULT_64_BIT | VK_QUERY_RESULT_WAIT_BIT);
125 121
126 switch (query_result) { 122 switch (query_result) {
127 case VK_SUCCESS: 123 case VK_SUCCESS:
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
index f57c15b37..c7a07fdd8 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
@@ -24,7 +24,6 @@
24#include "video_core/renderer_vulkan/vk_buffer_cache.h" 24#include "video_core/renderer_vulkan/vk_buffer_cache.h"
25#include "video_core/renderer_vulkan/vk_compute_pipeline.h" 25#include "video_core/renderer_vulkan/vk_compute_pipeline.h"
26#include "video_core/renderer_vulkan/vk_descriptor_pool.h" 26#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
27#include "video_core/renderer_vulkan/vk_graphics_pipeline.h"
28#include "video_core/renderer_vulkan/vk_pipeline_cache.h" 27#include "video_core/renderer_vulkan/vk_pipeline_cache.h"
29#include "video_core/renderer_vulkan/vk_rasterizer.h" 28#include "video_core/renderer_vulkan/vk_rasterizer.h"
30#include "video_core/renderer_vulkan/vk_scheduler.h" 29#include "video_core/renderer_vulkan/vk_scheduler.h"
@@ -55,11 +54,10 @@ struct DrawParams {
55 u32 num_instances; 54 u32 num_instances;
56 u32 base_vertex; 55 u32 base_vertex;
57 u32 num_vertices; 56 u32 num_vertices;
57 u32 first_index;
58 bool is_indexed; 58 bool is_indexed;
59}; 59};
60 60
61constexpr auto COMPUTE_SHADER_INDEX = static_cast<size_t>(Tegra::Engines::ShaderType::Compute);
62
63VkViewport GetViewportState(const Device& device, const Maxwell& regs, size_t index) { 61VkViewport GetViewportState(const Device& device, const Maxwell& regs, size_t index) {
64 const auto& src = regs.viewport_transform[index]; 62 const auto& src = regs.viewport_transform[index];
65 const float width = src.scale_x * 2.0f; 63 const float width = src.scale_x * 2.0f;
@@ -97,118 +95,6 @@ VkRect2D GetScissorState(const Maxwell& regs, size_t index) {
97 return scissor; 95 return scissor;
98} 96}
99 97
100std::array<GPUVAddr, Maxwell::MaxShaderProgram> GetShaderAddresses(
101 const std::array<Shader*, Maxwell::MaxShaderProgram>& shaders) {
102 std::array<GPUVAddr, Maxwell::MaxShaderProgram> addresses;
103 for (size_t i = 0; i < std::size(addresses); ++i) {
104 addresses[i] = shaders[i] ? shaders[i]->GetGpuAddr() : 0;
105 }
106 return addresses;
107}
108
109struct TextureHandle {
110 constexpr TextureHandle(u32 data, bool via_header_index) {
111 const Tegra::Texture::TextureHandle handle{data};
112 image = handle.tic_id;
113 sampler = via_header_index ? image : handle.tsc_id.Value();
114 }
115
116 u32 image;
117 u32 sampler;
118};
119
120template <typename Engine, typename Entry>
121TextureHandle GetTextureInfo(const Engine& engine, bool via_header_index, const Entry& entry,
122 size_t stage, size_t index = 0) {
123 const auto shader_type = static_cast<Tegra::Engines::ShaderType>(stage);
124 if constexpr (std::is_same_v<Entry, SamplerEntry>) {
125 if (entry.is_separated) {
126 const u32 buffer_1 = entry.buffer;
127 const u32 buffer_2 = entry.secondary_buffer;
128 const u32 offset_1 = entry.offset;
129 const u32 offset_2 = entry.secondary_offset;
130 const u32 handle_1 = engine.AccessConstBuffer32(shader_type, buffer_1, offset_1);
131 const u32 handle_2 = engine.AccessConstBuffer32(shader_type, buffer_2, offset_2);
132 return TextureHandle(handle_1 | handle_2, via_header_index);
133 }
134 }
135 if (entry.is_bindless) {
136 const u32 raw = engine.AccessConstBuffer32(shader_type, entry.buffer, entry.offset);
137 return TextureHandle(raw, via_header_index);
138 }
139 const u32 buffer = engine.GetBoundBuffer();
140 const u64 offset = (entry.offset + index) * sizeof(u32);
141 return TextureHandle(engine.AccessConstBuffer32(shader_type, buffer, offset), via_header_index);
142}
143
144ImageViewType ImageViewTypeFromEntry(const SamplerEntry& entry) {
145 if (entry.is_buffer) {
146 return ImageViewType::e2D;
147 }
148 switch (entry.type) {
149 case Tegra::Shader::TextureType::Texture1D:
150 return entry.is_array ? ImageViewType::e1DArray : ImageViewType::e1D;
151 case Tegra::Shader::TextureType::Texture2D:
152 return entry.is_array ? ImageViewType::e2DArray : ImageViewType::e2D;
153 case Tegra::Shader::TextureType::Texture3D:
154 return ImageViewType::e3D;
155 case Tegra::Shader::TextureType::TextureCube:
156 return entry.is_array ? ImageViewType::CubeArray : ImageViewType::Cube;
157 }
158 UNREACHABLE();
159 return ImageViewType::e2D;
160}
161
162ImageViewType ImageViewTypeFromEntry(const ImageEntry& entry) {
163 switch (entry.type) {
164 case Tegra::Shader::ImageType::Texture1D:
165 return ImageViewType::e1D;
166 case Tegra::Shader::ImageType::Texture1DArray:
167 return ImageViewType::e1DArray;
168 case Tegra::Shader::ImageType::Texture2D:
169 return ImageViewType::e2D;
170 case Tegra::Shader::ImageType::Texture2DArray:
171 return ImageViewType::e2DArray;
172 case Tegra::Shader::ImageType::Texture3D:
173 return ImageViewType::e3D;
174 case Tegra::Shader::ImageType::TextureBuffer:
175 return ImageViewType::Buffer;
176 }
177 UNREACHABLE();
178 return ImageViewType::e2D;
179}
180
181void PushImageDescriptors(const ShaderEntries& entries, TextureCache& texture_cache,
182 VKUpdateDescriptorQueue& update_descriptor_queue,
183 ImageViewId*& image_view_id_ptr, VkSampler*& sampler_ptr) {
184 for ([[maybe_unused]] const auto& entry : entries.uniform_texels) {
185 const ImageViewId image_view_id = *image_view_id_ptr++;
186 const ImageView& image_view = texture_cache.GetImageView(image_view_id);
187 update_descriptor_queue.AddTexelBuffer(image_view.BufferView());
188 }
189 for (const auto& entry : entries.samplers) {
190 for (size_t i = 0; i < entry.size; ++i) {
191 const VkSampler sampler = *sampler_ptr++;
192 const ImageViewId image_view_id = *image_view_id_ptr++;
193 const ImageView& image_view = texture_cache.GetImageView(image_view_id);
194 const VkImageView handle = image_view.Handle(ImageViewTypeFromEntry(entry));
195 update_descriptor_queue.AddSampledImage(handle, sampler);
196 }
197 }
198 for ([[maybe_unused]] const auto& entry : entries.storage_texels) {
199 const ImageViewId image_view_id = *image_view_id_ptr++;
200 const ImageView& image_view = texture_cache.GetImageView(image_view_id);
201 update_descriptor_queue.AddTexelBuffer(image_view.BufferView());
202 }
203 for (const auto& entry : entries.images) {
204 // TODO: Mark as modified
205 const ImageViewId image_view_id = *image_view_id_ptr++;
206 const ImageView& image_view = texture_cache.GetImageView(image_view_id);
207 const VkImageView handle = image_view.Handle(ImageViewTypeFromEntry(entry));
208 update_descriptor_queue.AddImage(handle);
209 }
210}
211
212DrawParams MakeDrawParams(const Maxwell& regs, u32 num_instances, bool is_instanced, 98DrawParams MakeDrawParams(const Maxwell& regs, u32 num_instances, bool is_instanced,
213 bool is_indexed) { 99 bool is_indexed) {
214 DrawParams params{ 100 DrawParams params{
@@ -216,6 +102,7 @@ DrawParams MakeDrawParams(const Maxwell& regs, u32 num_instances, bool is_instan
216 .num_instances = is_instanced ? num_instances : 1, 102 .num_instances = is_instanced ? num_instances : 1,
217 .base_vertex = is_indexed ? regs.vb_element_base : regs.vertex_buffer.first, 103 .base_vertex = is_indexed ? regs.vb_element_base : regs.vertex_buffer.first,
218 .num_vertices = is_indexed ? regs.index_array.count : regs.vertex_buffer.count, 104 .num_vertices = is_indexed ? regs.index_array.count : regs.vertex_buffer.count,
105 .first_index = is_indexed ? regs.index_array.first : 0,
219 .is_indexed = is_indexed, 106 .is_indexed = is_indexed,
220 }; 107 };
221 if (regs.draw.topology == Maxwell::PrimitiveTopology::Quads) { 108 if (regs.draw.topology == Maxwell::PrimitiveTopology::Quads) {
@@ -243,21 +130,21 @@ RasterizerVulkan::RasterizerVulkan(Core::Frontend::EmuWindow& emu_window_, Tegra
243 blit_image(device, scheduler, state_tracker, descriptor_pool), 130 blit_image(device, scheduler, state_tracker, descriptor_pool),
244 astc_decoder_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue, 131 astc_decoder_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue,
245 memory_allocator), 132 memory_allocator),
246 texture_cache_runtime{device, scheduler, memory_allocator, 133 render_pass_cache(device), texture_cache_runtime{device, scheduler,
247 staging_pool, blit_image, astc_decoder_pass}, 134 memory_allocator, staging_pool,
135 blit_image, astc_decoder_pass,
136 render_pass_cache},
248 texture_cache(texture_cache_runtime, *this, maxwell3d, kepler_compute, gpu_memory), 137 texture_cache(texture_cache_runtime, *this, maxwell3d, kepler_compute, gpu_memory),
249 buffer_cache_runtime(device, memory_allocator, scheduler, staging_pool, 138 buffer_cache_runtime(device, memory_allocator, scheduler, staging_pool,
250 update_descriptor_queue, descriptor_pool), 139 update_descriptor_queue, descriptor_pool),
251 buffer_cache(*this, maxwell3d, kepler_compute, gpu_memory, cpu_memory_, buffer_cache_runtime), 140 buffer_cache(*this, maxwell3d, kepler_compute, gpu_memory, cpu_memory_, buffer_cache_runtime),
252 pipeline_cache(*this, gpu, maxwell3d, kepler_compute, gpu_memory, device, scheduler, 141 pipeline_cache(*this, maxwell3d, kepler_compute, gpu_memory, device, scheduler,
253 descriptor_pool, update_descriptor_queue), 142 descriptor_pool, update_descriptor_queue, render_pass_cache, buffer_cache,
143 texture_cache, gpu.ShaderNotify()),
254 query_cache{*this, maxwell3d, gpu_memory, device, scheduler}, accelerate_dma{buffer_cache}, 144 query_cache{*this, maxwell3d, gpu_memory, device, scheduler}, accelerate_dma{buffer_cache},
255 fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache, device, scheduler), 145 fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache, device, scheduler),
256 wfi_event(device.GetLogical().CreateEvent()), async_shaders(emu_window_) { 146 wfi_event(device.GetLogical().CreateEvent()) {
257 scheduler.SetQueryCache(query_cache); 147 scheduler.SetQueryCache(query_cache);
258 if (device.UseAsynchronousShaders()) {
259 async_shaders.AllocateWorkers();
260 }
261} 148}
262 149
263RasterizerVulkan::~RasterizerVulkan() = default; 150RasterizerVulkan::~RasterizerVulkan() = default;
@@ -270,53 +157,30 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) {
270 157
271 query_cache.UpdateCounters(); 158 query_cache.UpdateCounters();
272 159
273 graphics_key.fixed_state.Refresh(maxwell3d, device.IsExtExtendedDynamicStateSupported()); 160 GraphicsPipeline* const pipeline{pipeline_cache.CurrentGraphicsPipeline()};
274 161 if (!pipeline) {
275 std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex};
276
277 texture_cache.SynchronizeGraphicsDescriptors();
278 texture_cache.UpdateRenderTargets(false);
279
280 const auto shaders = pipeline_cache.GetShaders();
281 graphics_key.shaders = GetShaderAddresses(shaders);
282
283 SetupShaderDescriptors(shaders, is_indexed);
284
285 const Framebuffer* const framebuffer = texture_cache.GetFramebuffer();
286 graphics_key.renderpass = framebuffer->RenderPass();
287
288 VKGraphicsPipeline* const pipeline = pipeline_cache.GetGraphicsPipeline(
289 graphics_key, framebuffer->NumColorBuffers(), async_shaders);
290 if (pipeline == nullptr || pipeline->GetHandle() == VK_NULL_HANDLE) {
291 // Async graphics pipeline was not ready.
292 return; 162 return;
293 } 163 }
164 std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex};
165 pipeline->Configure(is_indexed);
294 166
295 BeginTransformFeedback(); 167 BeginTransformFeedback();
296 168
297 scheduler.RequestRenderpass(framebuffer);
298 scheduler.BindGraphicsPipeline(pipeline->GetHandle());
299 UpdateDynamicStates(); 169 UpdateDynamicStates();
300 170
301 const auto& regs = maxwell3d.regs; 171 const auto& regs{maxwell3d.regs};
302 const u32 num_instances = maxwell3d.mme_draw.instance_count; 172 const u32 num_instances{maxwell3d.mme_draw.instance_count};
303 const DrawParams draw_params = MakeDrawParams(regs, num_instances, is_instanced, is_indexed); 173 const DrawParams draw_params{MakeDrawParams(regs, num_instances, is_instanced, is_indexed)};
304 const VkPipelineLayout pipeline_layout = pipeline->GetLayout(); 174 scheduler.Record([draw_params](vk::CommandBuffer cmdbuf) {
305 const VkDescriptorSet descriptor_set = pipeline->CommitDescriptorSet();
306 scheduler.Record([pipeline_layout, descriptor_set, draw_params](vk::CommandBuffer cmdbuf) {
307 if (descriptor_set) {
308 cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline_layout,
309 DESCRIPTOR_SET, descriptor_set, nullptr);
310 }
311 if (draw_params.is_indexed) { 175 if (draw_params.is_indexed) {
312 cmdbuf.DrawIndexed(draw_params.num_vertices, draw_params.num_instances, 0, 176 cmdbuf.DrawIndexed(draw_params.num_vertices, draw_params.num_instances,
313 draw_params.base_vertex, draw_params.base_instance); 177 draw_params.first_index, draw_params.base_vertex,
178 draw_params.base_instance);
314 } else { 179 } else {
315 cmdbuf.Draw(draw_params.num_vertices, draw_params.num_instances, 180 cmdbuf.Draw(draw_params.num_vertices, draw_params.num_instances,
316 draw_params.base_vertex, draw_params.base_instance); 181 draw_params.base_vertex, draw_params.base_instance);
317 } 182 }
318 }); 183 });
319
320 EndTransformFeedback(); 184 EndTransformFeedback();
321} 185}
322 186
@@ -326,6 +190,7 @@ void RasterizerVulkan::Clear() {
326 if (!maxwell3d.ShouldExecute()) { 190 if (!maxwell3d.ShouldExecute()) {
327 return; 191 return;
328 } 192 }
193 FlushWork();
329 194
330 query_cache.UpdateCounters(); 195 query_cache.UpdateCounters();
331 196
@@ -395,73 +260,20 @@ void RasterizerVulkan::Clear() {
395 }); 260 });
396} 261}
397 262
398void RasterizerVulkan::DispatchCompute(GPUVAddr code_addr) { 263void RasterizerVulkan::DispatchCompute() {
399 MICROPROFILE_SCOPE(Vulkan_Compute); 264 FlushWork();
400
401 query_cache.UpdateCounters();
402 265
403 const auto& launch_desc = kepler_compute.launch_description; 266 ComputePipeline* const pipeline{pipeline_cache.CurrentComputePipeline()};
404 auto& pipeline = pipeline_cache.GetComputePipeline({ 267 if (!pipeline) {
405 .shader = code_addr, 268 return;
406 .shared_memory_size = launch_desc.shared_alloc, 269 }
407 .workgroup_size{ 270 std::scoped_lock lock{texture_cache.mutex, buffer_cache.mutex};
408 launch_desc.block_dim_x, 271 pipeline->Configure(kepler_compute, gpu_memory, scheduler, buffer_cache, texture_cache);
409 launch_desc.block_dim_y,
410 launch_desc.block_dim_z,
411 },
412 });
413 272
414 // Compute dispatches can't be executed inside a renderpass 273 const auto& qmd{kepler_compute.launch_description};
274 const std::array<u32, 3> dim{qmd.grid_dim_x, qmd.grid_dim_y, qmd.grid_dim_z};
415 scheduler.RequestOutsideRenderPassOperationContext(); 275 scheduler.RequestOutsideRenderPassOperationContext();
416 276 scheduler.Record([dim](vk::CommandBuffer cmdbuf) { cmdbuf.Dispatch(dim[0], dim[1], dim[2]); });
417 image_view_indices.clear();
418 sampler_handles.clear();
419
420 std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex};
421
422 const auto& entries = pipeline.GetEntries();
423 buffer_cache.SetEnabledComputeUniformBuffers(entries.enabled_uniform_buffers);
424 buffer_cache.UnbindComputeStorageBuffers();
425 u32 ssbo_index = 0;
426 for (const auto& buffer : entries.global_buffers) {
427 buffer_cache.BindComputeStorageBuffer(ssbo_index, buffer.cbuf_index, buffer.cbuf_offset,
428 buffer.is_written);
429 ++ssbo_index;
430 }
431 buffer_cache.UpdateComputeBuffers();
432
433 texture_cache.SynchronizeComputeDescriptors();
434
435 SetupComputeUniformTexels(entries);
436 SetupComputeTextures(entries);
437 SetupComputeStorageTexels(entries);
438 SetupComputeImages(entries);
439
440 const std::span indices_span(image_view_indices.data(), image_view_indices.size());
441 texture_cache.FillComputeImageViews(indices_span, image_view_ids);
442
443 update_descriptor_queue.Acquire();
444
445 buffer_cache.BindHostComputeBuffers();
446
447 ImageViewId* image_view_id_ptr = image_view_ids.data();
448 VkSampler* sampler_ptr = sampler_handles.data();
449 PushImageDescriptors(entries, texture_cache, update_descriptor_queue, image_view_id_ptr,
450 sampler_ptr);
451
452 const VkPipeline pipeline_handle = pipeline.GetHandle();
453 const VkPipelineLayout pipeline_layout = pipeline.GetLayout();
454 const VkDescriptorSet descriptor_set = pipeline.CommitDescriptorSet();
455 scheduler.Record([grid_x = launch_desc.grid_dim_x, grid_y = launch_desc.grid_dim_y,
456 grid_z = launch_desc.grid_dim_z, pipeline_handle, pipeline_layout,
457 descriptor_set](vk::CommandBuffer cmdbuf) {
458 cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, pipeline_handle);
459 if (descriptor_set) {
460 cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, pipeline_layout,
461 DESCRIPTOR_SET, descriptor_set, nullptr);
462 }
463 cmdbuf.Dispatch(grid_x, grid_y, grid_z);
464 });
465} 277}
466 278
467void RasterizerVulkan::ResetCounter(VideoCore::QueryType type) { 279void RasterizerVulkan::ResetCounter(VideoCore::QueryType type) {
@@ -626,6 +438,7 @@ void RasterizerVulkan::WaitForIdle() {
626 438
627void RasterizerVulkan::FragmentBarrier() { 439void RasterizerVulkan::FragmentBarrier() {
628 // We already put barriers when a render pass finishes 440 // We already put barriers when a render pass finishes
441 scheduler.RequestOutsideRenderPassOperationContext();
629} 442}
630 443
631void RasterizerVulkan::TiledCacheBarrier() { 444void RasterizerVulkan::TiledCacheBarrier() {
@@ -633,10 +446,11 @@ void RasterizerVulkan::TiledCacheBarrier() {
633} 446}
634 447
635void RasterizerVulkan::FlushCommands() { 448void RasterizerVulkan::FlushCommands() {
636 if (draw_counter > 0) { 449 if (draw_counter == 0) {
637 draw_counter = 0; 450 return;
638 scheduler.Flush();
639 } 451 }
452 draw_counter = 0;
453 scheduler.Flush();
640} 454}
641 455
642void RasterizerVulkan::TickFrame() { 456void RasterizerVulkan::TickFrame() {
@@ -676,13 +490,18 @@ bool RasterizerVulkan::AccelerateDisplay(const Tegra::FramebufferConfig& config,
676 if (!image_view) { 490 if (!image_view) {
677 return false; 491 return false;
678 } 492 }
679 screen_info.image_view = image_view->Handle(VideoCommon::ImageViewType::e2D); 493 screen_info.image_view = image_view->Handle(Shader::TextureType::Color2D);
680 screen_info.width = image_view->size.width; 494 screen_info.width = image_view->size.width;
681 screen_info.height = image_view->size.height; 495 screen_info.height = image_view->size.height;
682 screen_info.is_srgb = VideoCore::Surface::IsPixelFormatSRGB(image_view->format); 496 screen_info.is_srgb = VideoCore::Surface::IsPixelFormatSRGB(image_view->format);
683 return true; 497 return true;
684} 498}
685 499
500void RasterizerVulkan::LoadDiskResources(u64 title_id, std::stop_token stop_loading,
501 const VideoCore::DiskResourceLoadCallback& callback) {
502 pipeline_cache.LoadDiskResources(title_id, stop_loading, callback);
503}
504
686void RasterizerVulkan::FlushWork() { 505void RasterizerVulkan::FlushWork() {
687 static constexpr u32 DRAWS_TO_DISPATCH = 4096; 506 static constexpr u32 DRAWS_TO_DISPATCH = 4096;
688 507
@@ -691,13 +510,11 @@ void RasterizerVulkan::FlushWork() {
691 if ((++draw_counter & 7) != 7) { 510 if ((++draw_counter & 7) != 7) {
692 return; 511 return;
693 } 512 }
694
695 if (draw_counter < DRAWS_TO_DISPATCH) { 513 if (draw_counter < DRAWS_TO_DISPATCH) {
696 // Send recorded tasks to the worker thread 514 // Send recorded tasks to the worker thread
697 scheduler.DispatchWork(); 515 scheduler.DispatchWork();
698 return; 516 return;
699 } 517 }
700
701 // Otherwise (every certain number of draws) flush execution. 518 // Otherwise (every certain number of draws) flush execution.
702 // This submits commands to the Vulkan driver. 519 // This submits commands to the Vulkan driver.
703 scheduler.Flush(); 520 scheduler.Flush();
@@ -716,52 +533,6 @@ bool AccelerateDMA::BufferCopy(GPUVAddr src_address, GPUVAddr dest_address, u64
716 return buffer_cache.DMACopy(src_address, dest_address, amount); 533 return buffer_cache.DMACopy(src_address, dest_address, amount);
717} 534}
718 535
719void RasterizerVulkan::SetupShaderDescriptors(
720 const std::array<Shader*, Maxwell::MaxShaderProgram>& shaders, bool is_indexed) {
721 image_view_indices.clear();
722 sampler_handles.clear();
723 for (size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) {
724 Shader* const shader = shaders[stage + 1];
725 if (!shader) {
726 continue;
727 }
728 const ShaderEntries& entries = shader->GetEntries();
729 SetupGraphicsUniformTexels(entries, stage);
730 SetupGraphicsTextures(entries, stage);
731 SetupGraphicsStorageTexels(entries, stage);
732 SetupGraphicsImages(entries, stage);
733
734 buffer_cache.SetEnabledUniformBuffers(stage, entries.enabled_uniform_buffers);
735 buffer_cache.UnbindGraphicsStorageBuffers(stage);
736 u32 ssbo_index = 0;
737 for (const auto& buffer : entries.global_buffers) {
738 buffer_cache.BindGraphicsStorageBuffer(stage, ssbo_index, buffer.cbuf_index,
739 buffer.cbuf_offset, buffer.is_written);
740 ++ssbo_index;
741 }
742 }
743 const std::span indices_span(image_view_indices.data(), image_view_indices.size());
744 buffer_cache.UpdateGraphicsBuffers(is_indexed);
745 texture_cache.FillGraphicsImageViews(indices_span, image_view_ids);
746
747 buffer_cache.BindHostGeometryBuffers(is_indexed);
748
749 update_descriptor_queue.Acquire();
750
751 ImageViewId* image_view_id_ptr = image_view_ids.data();
752 VkSampler* sampler_ptr = sampler_handles.data();
753 for (size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) {
754 // Skip VertexA stage
755 Shader* const shader = shaders[stage + 1];
756 if (!shader) {
757 continue;
758 }
759 buffer_cache.BindHostStageBuffers(stage);
760 PushImageDescriptors(shader->GetEntries(), texture_cache, update_descriptor_queue,
761 image_view_id_ptr, sampler_ptr);
762 }
763}
764
765void RasterizerVulkan::UpdateDynamicStates() { 536void RasterizerVulkan::UpdateDynamicStates() {
766 auto& regs = maxwell3d.regs; 537 auto& regs = maxwell3d.regs;
767 UpdateViewportsState(regs); 538 UpdateViewportsState(regs);
@@ -770,6 +541,7 @@ void RasterizerVulkan::UpdateDynamicStates() {
770 UpdateBlendConstants(regs); 541 UpdateBlendConstants(regs);
771 UpdateDepthBounds(regs); 542 UpdateDepthBounds(regs);
772 UpdateStencilFaces(regs); 543 UpdateStencilFaces(regs);
544 UpdateLineWidth(regs);
773 if (device.IsExtExtendedDynamicStateSupported()) { 545 if (device.IsExtExtendedDynamicStateSupported()) {
774 UpdateCullMode(regs); 546 UpdateCullMode(regs);
775 UpdateDepthBoundsTestEnable(regs); 547 UpdateDepthBoundsTestEnable(regs);
@@ -779,6 +551,9 @@ void RasterizerVulkan::UpdateDynamicStates() {
779 UpdateFrontFace(regs); 551 UpdateFrontFace(regs);
780 UpdateStencilOp(regs); 552 UpdateStencilOp(regs);
781 UpdateStencilTestEnable(regs); 553 UpdateStencilTestEnable(regs);
554 if (device.IsExtVertexInputDynamicStateSupported()) {
555 UpdateVertexInput(regs);
556 }
782 } 557 }
783} 558}
784 559
@@ -810,89 +585,6 @@ void RasterizerVulkan::EndTransformFeedback() {
810 [](vk::CommandBuffer cmdbuf) { cmdbuf.EndTransformFeedbackEXT(0, 0, nullptr, nullptr); }); 585 [](vk::CommandBuffer cmdbuf) { cmdbuf.EndTransformFeedbackEXT(0, 0, nullptr, nullptr); });
811} 586}
812 587
813void RasterizerVulkan::SetupGraphicsUniformTexels(const ShaderEntries& entries, size_t stage) {
814 const auto& regs = maxwell3d.regs;
815 const bool via_header_index = regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex;
816 for (const auto& entry : entries.uniform_texels) {
817 const TextureHandle handle = GetTextureInfo(maxwell3d, via_header_index, entry, stage);
818 image_view_indices.push_back(handle.image);
819 }
820}
821
822void RasterizerVulkan::SetupGraphicsTextures(const ShaderEntries& entries, size_t stage) {
823 const auto& regs = maxwell3d.regs;
824 const bool via_header_index = regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex;
825 for (const auto& entry : entries.samplers) {
826 for (size_t index = 0; index < entry.size; ++index) {
827 const TextureHandle handle =
828 GetTextureInfo(maxwell3d, via_header_index, entry, stage, index);
829 image_view_indices.push_back(handle.image);
830
831 Sampler* const sampler = texture_cache.GetGraphicsSampler(handle.sampler);
832 sampler_handles.push_back(sampler->Handle());
833 }
834 }
835}
836
837void RasterizerVulkan::SetupGraphicsStorageTexels(const ShaderEntries& entries, size_t stage) {
838 const auto& regs = maxwell3d.regs;
839 const bool via_header_index = regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex;
840 for (const auto& entry : entries.storage_texels) {
841 const TextureHandle handle = GetTextureInfo(maxwell3d, via_header_index, entry, stage);
842 image_view_indices.push_back(handle.image);
843 }
844}
845
846void RasterizerVulkan::SetupGraphicsImages(const ShaderEntries& entries, size_t stage) {
847 const auto& regs = maxwell3d.regs;
848 const bool via_header_index = regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex;
849 for (const auto& entry : entries.images) {
850 const TextureHandle handle = GetTextureInfo(maxwell3d, via_header_index, entry, stage);
851 image_view_indices.push_back(handle.image);
852 }
853}
854
855void RasterizerVulkan::SetupComputeUniformTexels(const ShaderEntries& entries) {
856 const bool via_header_index = kepler_compute.launch_description.linked_tsc;
857 for (const auto& entry : entries.uniform_texels) {
858 const TextureHandle handle =
859 GetTextureInfo(kepler_compute, via_header_index, entry, COMPUTE_SHADER_INDEX);
860 image_view_indices.push_back(handle.image);
861 }
862}
863
864void RasterizerVulkan::SetupComputeTextures(const ShaderEntries& entries) {
865 const bool via_header_index = kepler_compute.launch_description.linked_tsc;
866 for (const auto& entry : entries.samplers) {
867 for (size_t index = 0; index < entry.size; ++index) {
868 const TextureHandle handle = GetTextureInfo(kepler_compute, via_header_index, entry,
869 COMPUTE_SHADER_INDEX, index);
870 image_view_indices.push_back(handle.image);
871
872 Sampler* const sampler = texture_cache.GetComputeSampler(handle.sampler);
873 sampler_handles.push_back(sampler->Handle());
874 }
875 }
876}
877
878void RasterizerVulkan::SetupComputeStorageTexels(const ShaderEntries& entries) {
879 const bool via_header_index = kepler_compute.launch_description.linked_tsc;
880 for (const auto& entry : entries.storage_texels) {
881 const TextureHandle handle =
882 GetTextureInfo(kepler_compute, via_header_index, entry, COMPUTE_SHADER_INDEX);
883 image_view_indices.push_back(handle.image);
884 }
885}
886
887void RasterizerVulkan::SetupComputeImages(const ShaderEntries& entries) {
888 const bool via_header_index = kepler_compute.launch_description.linked_tsc;
889 for (const auto& entry : entries.images) {
890 const TextureHandle handle =
891 GetTextureInfo(kepler_compute, via_header_index, entry, COMPUTE_SHADER_INDEX);
892 image_view_indices.push_back(handle.image);
893 }
894}
895
896void RasterizerVulkan::UpdateViewportsState(Tegra::Engines::Maxwell3D::Regs& regs) { 588void RasterizerVulkan::UpdateViewportsState(Tegra::Engines::Maxwell3D::Regs& regs) {
897 if (!state_tracker.TouchViewports()) { 589 if (!state_tracker.TouchViewports()) {
898 return; 590 return;
@@ -985,6 +677,14 @@ void RasterizerVulkan::UpdateStencilFaces(Tegra::Engines::Maxwell3D::Regs& regs)
985 } 677 }
986} 678}
987 679
680void RasterizerVulkan::UpdateLineWidth(Tegra::Engines::Maxwell3D::Regs& regs) {
681 if (!state_tracker.TouchLineWidth()) {
682 return;
683 }
684 const float width = regs.line_smooth_enable ? regs.line_width_smooth : regs.line_width_aliased;
685 scheduler.Record([width](vk::CommandBuffer cmdbuf) { cmdbuf.SetLineWidth(width); });
686}
687
988void RasterizerVulkan::UpdateCullMode(Tegra::Engines::Maxwell3D::Regs& regs) { 688void RasterizerVulkan::UpdateCullMode(Tegra::Engines::Maxwell3D::Regs& regs) {
989 if (!state_tracker.TouchCullMode()) { 689 if (!state_tracker.TouchCullMode()) {
990 return; 690 return;
@@ -999,6 +699,11 @@ void RasterizerVulkan::UpdateDepthBoundsTestEnable(Tegra::Engines::Maxwell3D::Re
999 if (!state_tracker.TouchDepthBoundsTestEnable()) { 699 if (!state_tracker.TouchDepthBoundsTestEnable()) {
1000 return; 700 return;
1001 } 701 }
702 bool enabled = regs.depth_bounds_enable;
703 if (enabled && !device.IsDepthBoundsSupported()) {
704 LOG_WARNING(Render_Vulkan, "Depth bounds is enabled but not supported");
705 enabled = false;
706 }
1002 scheduler.Record([enable = regs.depth_bounds_enable](vk::CommandBuffer cmdbuf) { 707 scheduler.Record([enable = regs.depth_bounds_enable](vk::CommandBuffer cmdbuf) {
1003 cmdbuf.SetDepthBoundsTestEnableEXT(enable); 708 cmdbuf.SetDepthBoundsTestEnableEXT(enable);
1004 }); 709 });
@@ -1086,4 +791,62 @@ void RasterizerVulkan::UpdateStencilTestEnable(Tegra::Engines::Maxwell3D::Regs&
1086 }); 791 });
1087} 792}
1088 793
794void RasterizerVulkan::UpdateVertexInput(Tegra::Engines::Maxwell3D::Regs& regs) {
795 auto& dirty{maxwell3d.dirty.flags};
796 if (!dirty[Dirty::VertexInput]) {
797 return;
798 }
799 dirty[Dirty::VertexInput] = false;
800
801 boost::container::static_vector<VkVertexInputBindingDescription2EXT, 32> bindings;
802 boost::container::static_vector<VkVertexInputAttributeDescription2EXT, 32> attributes;
803
804 // There seems to be a bug on Nvidia's driver where updating only higher attributes ends up
805 // generating dirty state. Track the highest dirty attribute and update all attributes until
806 // that one.
807 size_t highest_dirty_attr{};
808 for (size_t index = 0; index < Maxwell::NumVertexAttributes; ++index) {
809 if (dirty[Dirty::VertexAttribute0 + index]) {
810 highest_dirty_attr = index;
811 }
812 }
813 for (size_t index = 0; index < highest_dirty_attr; ++index) {
814 const Maxwell::VertexAttribute attribute{regs.vertex_attrib_format[index]};
815 const u32 binding{attribute.buffer};
816 dirty[Dirty::VertexAttribute0 + index] = false;
817 dirty[Dirty::VertexBinding0 + static_cast<size_t>(binding)] = true;
818 if (!attribute.constant) {
819 attributes.push_back({
820 .sType = VK_STRUCTURE_TYPE_VERTEX_INPUT_ATTRIBUTE_DESCRIPTION_2_EXT,
821 .pNext = nullptr,
822 .location = static_cast<u32>(index),
823 .binding = binding,
824 .format = MaxwellToVK::VertexFormat(attribute.type, attribute.size),
825 .offset = attribute.offset,
826 });
827 }
828 }
829 for (size_t index = 0; index < Maxwell::NumVertexAttributes; ++index) {
830 if (!dirty[Dirty::VertexBinding0 + index]) {
831 continue;
832 }
833 dirty[Dirty::VertexBinding0 + index] = false;
834
835 const u32 binding{static_cast<u32>(index)};
836 const auto& input_binding{regs.vertex_array[binding]};
837 const bool is_instanced{regs.instanced_arrays.IsInstancingEnabled(binding)};
838 bindings.push_back({
839 .sType = VK_STRUCTURE_TYPE_VERTEX_INPUT_BINDING_DESCRIPTION_2_EXT,
840 .pNext = nullptr,
841 .binding = binding,
842 .stride = input_binding.stride,
843 .inputRate = is_instanced ? VK_VERTEX_INPUT_RATE_INSTANCE : VK_VERTEX_INPUT_RATE_VERTEX,
844 .divisor = is_instanced ? input_binding.divisor : 1,
845 });
846 }
847 scheduler.Record([bindings, attributes](vk::CommandBuffer cmdbuf) {
848 cmdbuf.SetVertexInputEXT(bindings, attributes);
849 });
850}
851
1089} // namespace Vulkan 852} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h
index 2065209be..866827247 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.h
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.h
@@ -21,14 +21,13 @@
21#include "video_core/renderer_vulkan/vk_buffer_cache.h" 21#include "video_core/renderer_vulkan/vk_buffer_cache.h"
22#include "video_core/renderer_vulkan/vk_descriptor_pool.h" 22#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
23#include "video_core/renderer_vulkan/vk_fence_manager.h" 23#include "video_core/renderer_vulkan/vk_fence_manager.h"
24#include "video_core/renderer_vulkan/vk_graphics_pipeline.h"
25#include "video_core/renderer_vulkan/vk_pipeline_cache.h" 24#include "video_core/renderer_vulkan/vk_pipeline_cache.h"
26#include "video_core/renderer_vulkan/vk_query_cache.h" 25#include "video_core/renderer_vulkan/vk_query_cache.h"
26#include "video_core/renderer_vulkan/vk_render_pass_cache.h"
27#include "video_core/renderer_vulkan/vk_scheduler.h" 27#include "video_core/renderer_vulkan/vk_scheduler.h"
28#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" 28#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h"
29#include "video_core/renderer_vulkan/vk_texture_cache.h" 29#include "video_core/renderer_vulkan/vk_texture_cache.h"
30#include "video_core/renderer_vulkan/vk_update_descriptor.h" 30#include "video_core/renderer_vulkan/vk_update_descriptor.h"
31#include "video_core/shader/async_shaders.h"
32#include "video_core/vulkan_common/vulkan_memory_allocator.h" 31#include "video_core/vulkan_common/vulkan_memory_allocator.h"
33#include "video_core/vulkan_common/vulkan_wrapper.h" 32#include "video_core/vulkan_common/vulkan_wrapper.h"
34 33
@@ -73,7 +72,7 @@ public:
73 72
74 void Draw(bool is_indexed, bool is_instanced) override; 73 void Draw(bool is_indexed, bool is_instanced) override;
75 void Clear() override; 74 void Clear() override;
76 void DispatchCompute(GPUVAddr code_addr) override; 75 void DispatchCompute() override;
77 void ResetCounter(VideoCore::QueryType type) override; 76 void ResetCounter(VideoCore::QueryType type) override;
78 void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) override; 77 void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) override;
79 void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, u32 size) override; 78 void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, u32 size) override;
@@ -102,19 +101,8 @@ public:
102 Tegra::Engines::AccelerateDMAInterface& AccessAccelerateDMA() override; 101 Tegra::Engines::AccelerateDMAInterface& AccessAccelerateDMA() override;
103 bool AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr, 102 bool AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr,
104 u32 pixel_stride) override; 103 u32 pixel_stride) override;
105 104 void LoadDiskResources(u64 title_id, std::stop_token stop_loading,
106 VideoCommon::Shader::AsyncShaders& GetAsyncShaders() { 105 const VideoCore::DiskResourceLoadCallback& callback) override;
107 return async_shaders;
108 }
109
110 const VideoCommon::Shader::AsyncShaders& GetAsyncShaders() const {
111 return async_shaders;
112 }
113
114 /// Maximum supported size that a constbuffer can have in bytes.
115 static constexpr size_t MaxConstbufferSize = 0x10000;
116 static_assert(MaxConstbufferSize % (4 * sizeof(float)) == 0,
117 "The maximum size of a constbuffer must be a multiple of the size of GLvec4");
118 106
119private: 107private:
120 static constexpr size_t MAX_TEXTURES = 192; 108 static constexpr size_t MAX_TEXTURES = 192;
@@ -125,46 +113,19 @@ private:
125 113
126 void FlushWork(); 114 void FlushWork();
127 115
128 /// Setup descriptors in the graphics pipeline.
129 void SetupShaderDescriptors(const std::array<Shader*, Maxwell::MaxShaderProgram>& shaders,
130 bool is_indexed);
131
132 void UpdateDynamicStates(); 116 void UpdateDynamicStates();
133 117
134 void BeginTransformFeedback(); 118 void BeginTransformFeedback();
135 119
136 void EndTransformFeedback(); 120 void EndTransformFeedback();
137 121
138 /// Setup uniform texels in the graphics pipeline.
139 void SetupGraphicsUniformTexels(const ShaderEntries& entries, std::size_t stage);
140
141 /// Setup textures in the graphics pipeline.
142 void SetupGraphicsTextures(const ShaderEntries& entries, std::size_t stage);
143
144 /// Setup storage texels in the graphics pipeline.
145 void SetupGraphicsStorageTexels(const ShaderEntries& entries, std::size_t stage);
146
147 /// Setup images in the graphics pipeline.
148 void SetupGraphicsImages(const ShaderEntries& entries, std::size_t stage);
149
150 /// Setup texel buffers in the compute pipeline.
151 void SetupComputeUniformTexels(const ShaderEntries& entries);
152
153 /// Setup textures in the compute pipeline.
154 void SetupComputeTextures(const ShaderEntries& entries);
155
156 /// Setup storage texels in the compute pipeline.
157 void SetupComputeStorageTexels(const ShaderEntries& entries);
158
159 /// Setup images in the compute pipeline.
160 void SetupComputeImages(const ShaderEntries& entries);
161
162 void UpdateViewportsState(Tegra::Engines::Maxwell3D::Regs& regs); 122 void UpdateViewportsState(Tegra::Engines::Maxwell3D::Regs& regs);
163 void UpdateScissorsState(Tegra::Engines::Maxwell3D::Regs& regs); 123 void UpdateScissorsState(Tegra::Engines::Maxwell3D::Regs& regs);
164 void UpdateDepthBias(Tegra::Engines::Maxwell3D::Regs& regs); 124 void UpdateDepthBias(Tegra::Engines::Maxwell3D::Regs& regs);
165 void UpdateBlendConstants(Tegra::Engines::Maxwell3D::Regs& regs); 125 void UpdateBlendConstants(Tegra::Engines::Maxwell3D::Regs& regs);
166 void UpdateDepthBounds(Tegra::Engines::Maxwell3D::Regs& regs); 126 void UpdateDepthBounds(Tegra::Engines::Maxwell3D::Regs& regs);
167 void UpdateStencilFaces(Tegra::Engines::Maxwell3D::Regs& regs); 127 void UpdateStencilFaces(Tegra::Engines::Maxwell3D::Regs& regs);
128 void UpdateLineWidth(Tegra::Engines::Maxwell3D::Regs& regs);
168 129
169 void UpdateCullMode(Tegra::Engines::Maxwell3D::Regs& regs); 130 void UpdateCullMode(Tegra::Engines::Maxwell3D::Regs& regs);
170 void UpdateDepthBoundsTestEnable(Tegra::Engines::Maxwell3D::Regs& regs); 131 void UpdateDepthBoundsTestEnable(Tegra::Engines::Maxwell3D::Regs& regs);
@@ -175,6 +136,8 @@ private:
175 void UpdateStencilOp(Tegra::Engines::Maxwell3D::Regs& regs); 136 void UpdateStencilOp(Tegra::Engines::Maxwell3D::Regs& regs);
176 void UpdateStencilTestEnable(Tegra::Engines::Maxwell3D::Regs& regs); 137 void UpdateStencilTestEnable(Tegra::Engines::Maxwell3D::Regs& regs);
177 138
139 void UpdateVertexInput(Tegra::Engines::Maxwell3D::Regs& regs);
140
178 Tegra::GPU& gpu; 141 Tegra::GPU& gpu;
179 Tegra::MemoryManager& gpu_memory; 142 Tegra::MemoryManager& gpu_memory;
180 Tegra::Engines::Maxwell3D& maxwell3d; 143 Tegra::Engines::Maxwell3D& maxwell3d;
@@ -187,24 +150,22 @@ private:
187 VKScheduler& scheduler; 150 VKScheduler& scheduler;
188 151
189 StagingBufferPool staging_pool; 152 StagingBufferPool staging_pool;
190 VKDescriptorPool descriptor_pool; 153 DescriptorPool descriptor_pool;
191 VKUpdateDescriptorQueue update_descriptor_queue; 154 VKUpdateDescriptorQueue update_descriptor_queue;
192 BlitImageHelper blit_image; 155 BlitImageHelper blit_image;
193 ASTCDecoderPass astc_decoder_pass; 156 ASTCDecoderPass astc_decoder_pass;
194 157 RenderPassCache render_pass_cache;
195 GraphicsPipelineCacheKey graphics_key;
196 158
197 TextureCacheRuntime texture_cache_runtime; 159 TextureCacheRuntime texture_cache_runtime;
198 TextureCache texture_cache; 160 TextureCache texture_cache;
199 BufferCacheRuntime buffer_cache_runtime; 161 BufferCacheRuntime buffer_cache_runtime;
200 BufferCache buffer_cache; 162 BufferCache buffer_cache;
201 VKPipelineCache pipeline_cache; 163 PipelineCache pipeline_cache;
202 VKQueryCache query_cache; 164 VKQueryCache query_cache;
203 AccelerateDMA accelerate_dma; 165 AccelerateDMA accelerate_dma;
204 VKFenceManager fence_manager; 166 VKFenceManager fence_manager;
205 167
206 vk::Event wfi_event; 168 vk::Event wfi_event;
207 VideoCommon::Shader::AsyncShaders async_shaders;
208 169
209 boost::container::static_vector<u32, MAX_IMAGE_VIEWS> image_view_indices; 170 boost::container::static_vector<u32, MAX_IMAGE_VIEWS> image_view_indices;
210 std::array<VideoCommon::ImageViewId, MAX_IMAGE_VIEWS> image_view_ids; 171 std::array<VideoCommon::ImageViewId, MAX_IMAGE_VIEWS> image_view_ids;
diff --git a/src/video_core/renderer_vulkan/vk_render_pass_cache.cpp b/src/video_core/renderer_vulkan/vk_render_pass_cache.cpp
new file mode 100644
index 000000000..451ffe019
--- /dev/null
+++ b/src/video_core/renderer_vulkan/vk_render_pass_cache.cpp
@@ -0,0 +1,96 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <unordered_map>
6
7#include <boost/container/static_vector.hpp>
8
9#include "video_core/renderer_vulkan/maxwell_to_vk.h"
10#include "video_core/renderer_vulkan/vk_render_pass_cache.h"
11#include "video_core/surface.h"
12#include "video_core/vulkan_common/vulkan_device.h"
13#include "video_core/vulkan_common/vulkan_wrapper.h"
14
15namespace Vulkan {
16namespace {
17using VideoCore::Surface::PixelFormat;
18
19VkAttachmentDescription AttachmentDescription(const Device& device, PixelFormat format,
20 VkSampleCountFlagBits samples) {
21 using MaxwellToVK::SurfaceFormat;
22 return {
23 .flags = VK_ATTACHMENT_DESCRIPTION_MAY_ALIAS_BIT,
24 .format = SurfaceFormat(device, FormatType::Optimal, true, format).format,
25 .samples = samples,
26 .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
27 .storeOp = VK_ATTACHMENT_STORE_OP_STORE,
28 .stencilLoadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
29 .stencilStoreOp = VK_ATTACHMENT_STORE_OP_STORE,
30 .initialLayout = VK_IMAGE_LAYOUT_GENERAL,
31 .finalLayout = VK_IMAGE_LAYOUT_GENERAL,
32 };
33}
34} // Anonymous namespace
35
36RenderPassCache::RenderPassCache(const Device& device_) : device{&device_} {}
37
38VkRenderPass RenderPassCache::Get(const RenderPassKey& key) {
39 std::lock_guard lock{mutex};
40 const auto [pair, is_new] = cache.try_emplace(key);
41 if (!is_new) {
42 return *pair->second;
43 }
44 boost::container::static_vector<VkAttachmentDescription, 9> descriptions;
45 std::array<VkAttachmentReference, 8> references{};
46 u32 num_attachments{};
47 u32 num_colors{};
48 for (size_t index = 0; index < key.color_formats.size(); ++index) {
49 const PixelFormat format{key.color_formats[index]};
50 const bool is_valid{format != PixelFormat::Invalid};
51 references[index] = VkAttachmentReference{
52 .attachment = is_valid ? num_colors : VK_ATTACHMENT_UNUSED,
53 .layout = VK_IMAGE_LAYOUT_GENERAL,
54 };
55 if (is_valid) {
56 descriptions.push_back(AttachmentDescription(*device, format, key.samples));
57 num_attachments = static_cast<u32>(index + 1);
58 ++num_colors;
59 }
60 }
61 const bool has_depth{key.depth_format != PixelFormat::Invalid};
62 VkAttachmentReference depth_reference{};
63 if (key.depth_format != PixelFormat::Invalid) {
64 depth_reference = VkAttachmentReference{
65 .attachment = num_colors,
66 .layout = VK_IMAGE_LAYOUT_GENERAL,
67 };
68 descriptions.push_back(AttachmentDescription(*device, key.depth_format, key.samples));
69 }
70 const VkSubpassDescription subpass{
71 .flags = 0,
72 .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
73 .inputAttachmentCount = 0,
74 .pInputAttachments = nullptr,
75 .colorAttachmentCount = num_attachments,
76 .pColorAttachments = references.data(),
77 .pResolveAttachments = nullptr,
78 .pDepthStencilAttachment = has_depth ? &depth_reference : nullptr,
79 .preserveAttachmentCount = 0,
80 .pPreserveAttachments = nullptr,
81 };
82 pair->second = device->GetLogical().CreateRenderPass({
83 .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO,
84 .pNext = nullptr,
85 .flags = 0,
86 .attachmentCount = static_cast<u32>(descriptions.size()),
87 .pAttachments = descriptions.empty() ? nullptr : descriptions.data(),
88 .subpassCount = 1,
89 .pSubpasses = &subpass,
90 .dependencyCount = 0,
91 .pDependencies = nullptr,
92 });
93 return *pair->second;
94}
95
96} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_render_pass_cache.h b/src/video_core/renderer_vulkan/vk_render_pass_cache.h
new file mode 100644
index 000000000..eaa0ed775
--- /dev/null
+++ b/src/video_core/renderer_vulkan/vk_render_pass_cache.h
@@ -0,0 +1,55 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <mutex>
8#include <unordered_map>
9
10#include "video_core/surface.h"
11#include "video_core/vulkan_common/vulkan_wrapper.h"
12
13namespace Vulkan {
14
15struct RenderPassKey {
16 auto operator<=>(const RenderPassKey&) const noexcept = default;
17
18 std::array<VideoCore::Surface::PixelFormat, 8> color_formats;
19 VideoCore::Surface::PixelFormat depth_format;
20 VkSampleCountFlagBits samples;
21};
22
23} // namespace Vulkan
24
25namespace std {
26template <>
27struct hash<Vulkan::RenderPassKey> {
28 [[nodiscard]] size_t operator()(const Vulkan::RenderPassKey& key) const noexcept {
29 size_t value = static_cast<size_t>(key.depth_format) << 48;
30 value ^= static_cast<size_t>(key.samples) << 52;
31 for (size_t i = 0; i < key.color_formats.size(); ++i) {
32 value ^= static_cast<size_t>(key.color_formats[i]) << (i * 6);
33 }
34 return value;
35 }
36};
37} // namespace std
38
39namespace Vulkan {
40
41class Device;
42
43class RenderPassCache {
44public:
45 explicit RenderPassCache(const Device& device_);
46
47 VkRenderPass Get(const RenderPassKey& key);
48
49private:
50 const Device* device{};
51 std::unordered_map<RenderPassKey, vk::RenderPass> cache;
52 std::mutex mutex;
53};
54
55} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_resource_pool.cpp b/src/video_core/renderer_vulkan/vk_resource_pool.cpp
index a8bf7bda8..2dd514968 100644
--- a/src/video_core/renderer_vulkan/vk_resource_pool.cpp
+++ b/src/video_core/renderer_vulkan/vk_resource_pool.cpp
@@ -10,18 +10,16 @@
10namespace Vulkan { 10namespace Vulkan {
11 11
12ResourcePool::ResourcePool(MasterSemaphore& master_semaphore_, size_t grow_step_) 12ResourcePool::ResourcePool(MasterSemaphore& master_semaphore_, size_t grow_step_)
13 : master_semaphore{master_semaphore_}, grow_step{grow_step_} {} 13 : master_semaphore{&master_semaphore_}, grow_step{grow_step_} {}
14
15ResourcePool::~ResourcePool() = default;
16 14
17size_t ResourcePool::CommitResource() { 15size_t ResourcePool::CommitResource() {
18 // Refresh semaphore to query updated results 16 // Refresh semaphore to query updated results
19 master_semaphore.Refresh(); 17 master_semaphore->Refresh();
20 const u64 gpu_tick = master_semaphore.KnownGpuTick(); 18 const u64 gpu_tick = master_semaphore->KnownGpuTick();
21 const auto search = [this, gpu_tick](size_t begin, size_t end) -> std::optional<size_t> { 19 const auto search = [this, gpu_tick](size_t begin, size_t end) -> std::optional<size_t> {
22 for (size_t iterator = begin; iterator < end; ++iterator) { 20 for (size_t iterator = begin; iterator < end; ++iterator) {
23 if (gpu_tick >= ticks[iterator]) { 21 if (gpu_tick >= ticks[iterator]) {
24 ticks[iterator] = master_semaphore.CurrentTick(); 22 ticks[iterator] = master_semaphore->CurrentTick();
25 return iterator; 23 return iterator;
26 } 24 }
27 } 25 }
@@ -36,7 +34,7 @@ size_t ResourcePool::CommitResource() {
36 // Both searches failed, the pool is full; handle it. 34 // Both searches failed, the pool is full; handle it.
37 const size_t free_resource = ManageOverflow(); 35 const size_t free_resource = ManageOverflow();
38 36
39 ticks[free_resource] = master_semaphore.CurrentTick(); 37 ticks[free_resource] = master_semaphore->CurrentTick();
40 found = free_resource; 38 found = free_resource;
41 } 39 }
42 } 40 }
diff --git a/src/video_core/renderer_vulkan/vk_resource_pool.h b/src/video_core/renderer_vulkan/vk_resource_pool.h
index 9d0bb3b4d..f0b80ad59 100644
--- a/src/video_core/renderer_vulkan/vk_resource_pool.h
+++ b/src/video_core/renderer_vulkan/vk_resource_pool.h
@@ -18,8 +18,16 @@ class MasterSemaphore;
18 */ 18 */
19class ResourcePool { 19class ResourcePool {
20public: 20public:
21 explicit ResourcePool() = default;
21 explicit ResourcePool(MasterSemaphore& master_semaphore, size_t grow_step); 22 explicit ResourcePool(MasterSemaphore& master_semaphore, size_t grow_step);
22 virtual ~ResourcePool(); 23
24 virtual ~ResourcePool() = default;
25
26 ResourcePool& operator=(ResourcePool&&) noexcept = default;
27 ResourcePool(ResourcePool&&) noexcept = default;
28
29 ResourcePool& operator=(const ResourcePool&) = default;
30 ResourcePool(const ResourcePool&) = default;
23 31
24protected: 32protected:
25 size_t CommitResource(); 33 size_t CommitResource();
@@ -34,7 +42,7 @@ private:
34 /// Allocates a new page of resources. 42 /// Allocates a new page of resources.
35 void Grow(); 43 void Grow();
36 44
37 MasterSemaphore& master_semaphore; 45 MasterSemaphore* master_semaphore{};
38 size_t grow_step = 0; ///< Number of new resources created after an overflow 46 size_t grow_step = 0; ///< Number of new resources created after an overflow
39 size_t hint_iterator = 0; ///< Hint to where the next free resources is likely to be found 47 size_t hint_iterator = 0; ///< Hint to where the next free resources is likely to be found
40 std::vector<u64> ticks; ///< Ticks for each resource 48 std::vector<u64> ticks; ///< Ticks for each resource
diff --git a/src/video_core/renderer_vulkan/vk_scheduler.cpp b/src/video_core/renderer_vulkan/vk_scheduler.cpp
index f35c120b0..4840962de 100644
--- a/src/video_core/renderer_vulkan/vk_scheduler.cpp
+++ b/src/video_core/renderer_vulkan/vk_scheduler.cpp
@@ -31,7 +31,7 @@ void VKScheduler::CommandChunk::ExecuteAll(vk::CommandBuffer cmdbuf) {
31 command->~Command(); 31 command->~Command();
32 command = next; 32 command = next;
33 } 33 }
34 34 submit = false;
35 command_offset = 0; 35 command_offset = 0;
36 first = nullptr; 36 first = nullptr;
37 last = nullptr; 37 last = nullptr;
@@ -42,13 +42,16 @@ VKScheduler::VKScheduler(const Device& device_, StateTracker& state_tracker_)
42 master_semaphore{std::make_unique<MasterSemaphore>(device)}, 42 master_semaphore{std::make_unique<MasterSemaphore>(device)},
43 command_pool{std::make_unique<CommandPool>(*master_semaphore, device)} { 43 command_pool{std::make_unique<CommandPool>(*master_semaphore, device)} {
44 AcquireNewChunk(); 44 AcquireNewChunk();
45 AllocateNewContext(); 45 AllocateWorkerCommandBuffer();
46 worker_thread = std::thread(&VKScheduler::WorkerThread, this); 46 worker_thread = std::thread(&VKScheduler::WorkerThread, this);
47} 47}
48 48
49VKScheduler::~VKScheduler() { 49VKScheduler::~VKScheduler() {
50 quit = true; 50 {
51 cv.notify_all(); 51 std::lock_guard lock{work_mutex};
52 quit = true;
53 }
54 work_cv.notify_all();
52 worker_thread.join(); 55 worker_thread.join();
53} 56}
54 57
@@ -60,6 +63,7 @@ void VKScheduler::Flush(VkSemaphore semaphore) {
60void VKScheduler::Finish(VkSemaphore semaphore) { 63void VKScheduler::Finish(VkSemaphore semaphore) {
61 const u64 presubmit_tick = CurrentTick(); 64 const u64 presubmit_tick = CurrentTick();
62 SubmitExecution(semaphore); 65 SubmitExecution(semaphore);
66 WaitWorker();
63 Wait(presubmit_tick); 67 Wait(presubmit_tick);
64 AllocateNewContext(); 68 AllocateNewContext();
65} 69}
@@ -68,20 +72,19 @@ void VKScheduler::WaitWorker() {
68 MICROPROFILE_SCOPE(Vulkan_WaitForWorker); 72 MICROPROFILE_SCOPE(Vulkan_WaitForWorker);
69 DispatchWork(); 73 DispatchWork();
70 74
71 bool finished = false; 75 std::unique_lock lock{work_mutex};
72 do { 76 wait_cv.wait(lock, [this] { return work_queue.empty(); });
73 cv.notify_all();
74 std::unique_lock lock{mutex};
75 finished = chunk_queue.Empty();
76 } while (!finished);
77} 77}
78 78
79void VKScheduler::DispatchWork() { 79void VKScheduler::DispatchWork() {
80 if (chunk->Empty()) { 80 if (chunk->Empty()) {
81 return; 81 return;
82 } 82 }
83 chunk_queue.Push(std::move(chunk)); 83 {
84 cv.notify_all(); 84 std::lock_guard lock{work_mutex};
85 work_queue.push(std::move(chunk));
86 }
87 work_cv.notify_one();
85 AcquireNewChunk(); 88 AcquireNewChunk();
86} 89}
87 90
@@ -124,93 +127,101 @@ void VKScheduler::RequestOutsideRenderPassOperationContext() {
124 EndRenderPass(); 127 EndRenderPass();
125} 128}
126 129
127void VKScheduler::BindGraphicsPipeline(VkPipeline pipeline) { 130bool VKScheduler::UpdateGraphicsPipeline(GraphicsPipeline* pipeline) {
128 if (state.graphics_pipeline == pipeline) { 131 if (state.graphics_pipeline == pipeline) {
129 return; 132 return false;
130 } 133 }
131 state.graphics_pipeline = pipeline; 134 state.graphics_pipeline = pipeline;
132 Record([pipeline](vk::CommandBuffer cmdbuf) { 135 return true;
133 cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline);
134 });
135} 136}
136 137
137void VKScheduler::WorkerThread() { 138void VKScheduler::WorkerThread() {
138 Common::SetCurrentThreadPriority(Common::ThreadPriority::High); 139 Common::SetCurrentThreadName("yuzu:VulkanWorker");
139 std::unique_lock lock{mutex};
140 do { 140 do {
141 cv.wait(lock, [this] { return !chunk_queue.Empty() || quit; }); 141 if (work_queue.empty()) {
142 if (quit) { 142 wait_cv.notify_all();
143 continue; 143 }
144 std::unique_ptr<CommandChunk> work;
145 {
146 std::unique_lock lock{work_mutex};
147 work_cv.wait(lock, [this] { return !work_queue.empty() || quit; });
148 if (quit) {
149 continue;
150 }
151 work = std::move(work_queue.front());
152 work_queue.pop();
153 }
154 const bool has_submit = work->HasSubmit();
155 work->ExecuteAll(current_cmdbuf);
156 if (has_submit) {
157 AllocateWorkerCommandBuffer();
144 } 158 }
145 auto extracted_chunk = std::move(chunk_queue.Front()); 159 std::lock_guard reserve_lock{reserve_mutex};
146 chunk_queue.Pop(); 160 chunk_reserve.push_back(std::move(work));
147 extracted_chunk->ExecuteAll(current_cmdbuf);
148 chunk_reserve.Push(std::move(extracted_chunk));
149 } while (!quit); 161 } while (!quit);
150} 162}
151 163
164void VKScheduler::AllocateWorkerCommandBuffer() {
165 current_cmdbuf = vk::CommandBuffer(command_pool->Commit(), device.GetDispatchLoader());
166 current_cmdbuf.Begin({
167 .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO,
168 .pNext = nullptr,
169 .flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT,
170 .pInheritanceInfo = nullptr,
171 });
172}
173
152void VKScheduler::SubmitExecution(VkSemaphore semaphore) { 174void VKScheduler::SubmitExecution(VkSemaphore semaphore) {
153 EndPendingOperations(); 175 EndPendingOperations();
154 InvalidateState(); 176 InvalidateState();
155 WaitWorker();
156 177
157 std::unique_lock lock{mutex}; 178 const u64 signal_value = master_semaphore->NextTick();
179 Record([semaphore, signal_value, this](vk::CommandBuffer cmdbuf) {
180 cmdbuf.End();
158 181
159 current_cmdbuf.End(); 182 const u32 num_signal_semaphores = semaphore ? 2U : 1U;
160 183
161 const VkSemaphore timeline_semaphore = master_semaphore->Handle(); 184 const u64 wait_value = signal_value - 1;
162 const u32 num_signal_semaphores = semaphore ? 2U : 1U; 185 const VkPipelineStageFlags wait_stage_mask = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT;
163 186
164 const u64 signal_value = master_semaphore->CurrentTick(); 187 const VkSemaphore timeline_semaphore = master_semaphore->Handle();
165 const u64 wait_value = signal_value - 1; 188 const std::array signal_values{signal_value, u64(0)};
166 const VkPipelineStageFlags wait_stage_mask = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT; 189 const std::array signal_semaphores{timeline_semaphore, semaphore};
167 190
168 master_semaphore->NextTick(); 191 const VkTimelineSemaphoreSubmitInfoKHR timeline_si{
169 192 .sType = VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO_KHR,
170 const std::array signal_values{signal_value, u64(0)}; 193 .pNext = nullptr,
171 const std::array signal_semaphores{timeline_semaphore, semaphore}; 194 .waitSemaphoreValueCount = 1,
172 195 .pWaitSemaphoreValues = &wait_value,
173 const VkTimelineSemaphoreSubmitInfoKHR timeline_si{ 196 .signalSemaphoreValueCount = num_signal_semaphores,
174 .sType = VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO_KHR, 197 .pSignalSemaphoreValues = signal_values.data(),
175 .pNext = nullptr, 198 };
176 .waitSemaphoreValueCount = 1, 199 const VkSubmitInfo submit_info{
177 .pWaitSemaphoreValues = &wait_value, 200 .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO,
178 .signalSemaphoreValueCount = num_signal_semaphores, 201 .pNext = &timeline_si,
179 .pSignalSemaphoreValues = signal_values.data(), 202 .waitSemaphoreCount = 1,
180 }; 203 .pWaitSemaphores = &timeline_semaphore,
181 const VkSubmitInfo submit_info{ 204 .pWaitDstStageMask = &wait_stage_mask,
182 .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO, 205 .commandBufferCount = 1,
183 .pNext = &timeline_si, 206 .pCommandBuffers = cmdbuf.address(),
184 .waitSemaphoreCount = 1, 207 .signalSemaphoreCount = num_signal_semaphores,
185 .pWaitSemaphores = &timeline_semaphore, 208 .pSignalSemaphores = signal_semaphores.data(),
186 .pWaitDstStageMask = &wait_stage_mask, 209 };
187 .commandBufferCount = 1, 210 switch (const VkResult result = device.GetGraphicsQueue().Submit(submit_info)) {
188 .pCommandBuffers = current_cmdbuf.address(), 211 case VK_SUCCESS:
189 .signalSemaphoreCount = num_signal_semaphores, 212 break;
190 .pSignalSemaphores = signal_semaphores.data(), 213 case VK_ERROR_DEVICE_LOST:
191 }; 214 device.ReportLoss();
192 switch (const VkResult result = device.GetGraphicsQueue().Submit(submit_info)) { 215 [[fallthrough]];
193 case VK_SUCCESS: 216 default:
194 break; 217 vk::Check(result);
195 case VK_ERROR_DEVICE_LOST: 218 }
196 device.ReportLoss(); 219 });
197 [[fallthrough]]; 220 chunk->MarkSubmit();
198 default: 221 DispatchWork();
199 vk::Check(result);
200 }
201} 222}
202 223
203void VKScheduler::AllocateNewContext() { 224void VKScheduler::AllocateNewContext() {
204 std::unique_lock lock{mutex};
205
206 current_cmdbuf = vk::CommandBuffer(command_pool->Commit(), device.GetDispatchLoader());
207 current_cmdbuf.Begin({
208 .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO,
209 .pNext = nullptr,
210 .flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT,
211 .pInheritanceInfo = nullptr,
212 });
213
214 // Enable counters once again. These are disabled when a command buffer is finished. 225 // Enable counters once again. These are disabled when a command buffer is finished.
215 if (query_cache) { 226 if (query_cache) {
216 query_cache->UpdateCounters(); 227 query_cache->UpdateCounters();
@@ -265,12 +276,13 @@ void VKScheduler::EndRenderPass() {
265} 276}
266 277
267void VKScheduler::AcquireNewChunk() { 278void VKScheduler::AcquireNewChunk() {
268 if (chunk_reserve.Empty()) { 279 std::lock_guard lock{reserve_mutex};
280 if (chunk_reserve.empty()) {
269 chunk = std::make_unique<CommandChunk>(); 281 chunk = std::make_unique<CommandChunk>();
270 return; 282 return;
271 } 283 }
272 chunk = std::move(chunk_reserve.Front()); 284 chunk = std::move(chunk_reserve.back());
273 chunk_reserve.Pop(); 285 chunk_reserve.pop_back();
274} 286}
275 287
276} // namespace Vulkan 288} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_scheduler.h b/src/video_core/renderer_vulkan/vk_scheduler.h
index 3ce48e9d2..cf39a2363 100644
--- a/src/video_core/renderer_vulkan/vk_scheduler.h
+++ b/src/video_core/renderer_vulkan/vk_scheduler.h
@@ -8,12 +8,12 @@
8#include <condition_variable> 8#include <condition_variable>
9#include <cstddef> 9#include <cstddef>
10#include <memory> 10#include <memory>
11#include <stack>
12#include <thread> 11#include <thread>
13#include <utility> 12#include <utility>
13#include <queue>
14
14#include "common/alignment.h" 15#include "common/alignment.h"
15#include "common/common_types.h" 16#include "common/common_types.h"
16#include "common/threadsafe_queue.h"
17#include "video_core/renderer_vulkan/vk_master_semaphore.h" 17#include "video_core/renderer_vulkan/vk_master_semaphore.h"
18#include "video_core/vulkan_common/vulkan_wrapper.h" 18#include "video_core/vulkan_common/vulkan_wrapper.h"
19 19
@@ -22,6 +22,7 @@ namespace Vulkan {
22class CommandPool; 22class CommandPool;
23class Device; 23class Device;
24class Framebuffer; 24class Framebuffer;
25class GraphicsPipeline;
25class StateTracker; 26class StateTracker;
26class VKQueryCache; 27class VKQueryCache;
27 28
@@ -52,8 +53,8 @@ public:
52 /// of a renderpass. 53 /// of a renderpass.
53 void RequestOutsideRenderPassOperationContext(); 54 void RequestOutsideRenderPassOperationContext();
54 55
55 /// Binds a pipeline to the current execution context. 56 /// Update the pipeline to the current execution context.
56 void BindGraphicsPipeline(VkPipeline pipeline); 57 bool UpdateGraphicsPipeline(GraphicsPipeline* pipeline);
57 58
58 /// Invalidates current command buffer state except for render passes 59 /// Invalidates current command buffer state except for render passes
59 void InvalidateState(); 60 void InvalidateState();
@@ -85,6 +86,10 @@ public:
85 86
86 /// Waits for the given tick to trigger on the GPU. 87 /// Waits for the given tick to trigger on the GPU.
87 void Wait(u64 tick) { 88 void Wait(u64 tick) {
89 if (tick >= master_semaphore->CurrentTick()) {
90 // Make sure we are not waiting for the current tick without signalling
91 Flush();
92 }
88 master_semaphore->Wait(tick); 93 master_semaphore->Wait(tick);
89 } 94 }
90 95
@@ -154,15 +159,24 @@ private:
154 return true; 159 return true;
155 } 160 }
156 161
162 void MarkSubmit() {
163 submit = true;
164 }
165
157 bool Empty() const { 166 bool Empty() const {
158 return command_offset == 0; 167 return command_offset == 0;
159 } 168 }
160 169
170 bool HasSubmit() const {
171 return submit;
172 }
173
161 private: 174 private:
162 Command* first = nullptr; 175 Command* first = nullptr;
163 Command* last = nullptr; 176 Command* last = nullptr;
164 177
165 size_t command_offset = 0; 178 size_t command_offset = 0;
179 bool submit = false;
166 alignas(std::max_align_t) std::array<u8, 0x8000> data{}; 180 alignas(std::max_align_t) std::array<u8, 0x8000> data{};
167 }; 181 };
168 182
@@ -170,11 +184,13 @@ private:
170 VkRenderPass renderpass = nullptr; 184 VkRenderPass renderpass = nullptr;
171 VkFramebuffer framebuffer = nullptr; 185 VkFramebuffer framebuffer = nullptr;
172 VkExtent2D render_area = {0, 0}; 186 VkExtent2D render_area = {0, 0};
173 VkPipeline graphics_pipeline = nullptr; 187 GraphicsPipeline* graphics_pipeline = nullptr;
174 }; 188 };
175 189
176 void WorkerThread(); 190 void WorkerThread();
177 191
192 void AllocateWorkerCommandBuffer();
193
178 void SubmitExecution(VkSemaphore semaphore); 194 void SubmitExecution(VkSemaphore semaphore);
179 195
180 void AllocateNewContext(); 196 void AllocateNewContext();
@@ -204,11 +220,13 @@ private:
204 std::array<VkImage, 9> renderpass_images{}; 220 std::array<VkImage, 9> renderpass_images{};
205 std::array<VkImageSubresourceRange, 9> renderpass_image_ranges{}; 221 std::array<VkImageSubresourceRange, 9> renderpass_image_ranges{};
206 222
207 Common::SPSCQueue<std::unique_ptr<CommandChunk>> chunk_queue; 223 std::queue<std::unique_ptr<CommandChunk>> work_queue;
208 Common::SPSCQueue<std::unique_ptr<CommandChunk>> chunk_reserve; 224 std::vector<std::unique_ptr<CommandChunk>> chunk_reserve;
209 std::mutex mutex; 225 std::mutex reserve_mutex;
210 std::condition_variable cv; 226 std::mutex work_mutex;
211 bool quit = false; 227 std::condition_variable work_cv;
228 std::condition_variable wait_cv;
229 std::atomic_bool quit{};
212}; 230};
213 231
214} // namespace Vulkan 232} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
deleted file mode 100644
index c6846d886..000000000
--- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
+++ /dev/null
@@ -1,3166 +0,0 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <functional>
6#include <limits>
7#include <map>
8#include <optional>
9#include <type_traits>
10#include <unordered_map>
11#include <utility>
12
13#include <fmt/format.h>
14
15#include <sirit/sirit.h>
16
17#include "common/alignment.h"
18#include "common/assert.h"
19#include "common/common_types.h"
20#include "common/logging/log.h"
21#include "video_core/engines/maxwell_3d.h"
22#include "video_core/engines/shader_bytecode.h"
23#include "video_core/engines/shader_header.h"
24#include "video_core/engines/shader_type.h"
25#include "video_core/renderer_vulkan/vk_shader_decompiler.h"
26#include "video_core/shader/node.h"
27#include "video_core/shader/shader_ir.h"
28#include "video_core/shader/transform_feedback.h"
29#include "video_core/vulkan_common/vulkan_device.h"
30
31namespace Vulkan {
32
33namespace {
34
35using Sirit::Id;
36using Tegra::Engines::ShaderType;
37using Tegra::Shader::Attribute;
38using Tegra::Shader::PixelImap;
39using Tegra::Shader::Register;
40using namespace VideoCommon::Shader;
41
42using Maxwell = Tegra::Engines::Maxwell3D::Regs;
43using Operation = const OperationNode&;
44
45class ASTDecompiler;
46class ExprDecompiler;
47
48// TODO(Rodrigo): Use rasterizer's value
49constexpr u32 MaxConstBufferFloats = 0x4000;
50constexpr u32 MaxConstBufferElements = MaxConstBufferFloats / 4;
51
52constexpr u32 NumInputPatches = 32; // This value seems to be the standard
53
54enum class Type { Void, Bool, Bool2, Float, Int, Uint, HalfFloat };
55
56class Expression final {
57public:
58 Expression(Id id_, Type type_) : id{id_}, type{type_} {
59 ASSERT(type_ != Type::Void);
60 }
61 Expression() : type{Type::Void} {}
62
63 Id id{};
64 Type type{};
65};
66static_assert(std::is_standard_layout_v<Expression>);
67
68struct TexelBuffer {
69 Id image_type{};
70 Id image{};
71};
72
73struct SampledImage {
74 Id image_type{};
75 Id sampler_type{};
76 Id sampler_pointer_type{};
77 Id variable{};
78};
79
80struct StorageImage {
81 Id image_type{};
82 Id image{};
83};
84
85struct AttributeType {
86 Type type;
87 Id scalar;
88 Id vector;
89};
90
91struct VertexIndices {
92 std::optional<u32> position;
93 std::optional<u32> layer;
94 std::optional<u32> viewport;
95 std::optional<u32> point_size;
96 std::optional<u32> clip_distances;
97};
98
99struct GenericVaryingDescription {
100 Id id = nullptr;
101 u32 first_element = 0;
102 bool is_scalar = false;
103};
104
105spv::Dim GetSamplerDim(const SamplerEntry& sampler) {
106 ASSERT(!sampler.is_buffer);
107 switch (sampler.type) {
108 case Tegra::Shader::TextureType::Texture1D:
109 return spv::Dim::Dim1D;
110 case Tegra::Shader::TextureType::Texture2D:
111 return spv::Dim::Dim2D;
112 case Tegra::Shader::TextureType::Texture3D:
113 return spv::Dim::Dim3D;
114 case Tegra::Shader::TextureType::TextureCube:
115 return spv::Dim::Cube;
116 default:
117 UNIMPLEMENTED_MSG("Unimplemented sampler type={}", sampler.type);
118 return spv::Dim::Dim2D;
119 }
120}
121
122std::pair<spv::Dim, bool> GetImageDim(const ImageEntry& image) {
123 switch (image.type) {
124 case Tegra::Shader::ImageType::Texture1D:
125 return {spv::Dim::Dim1D, false};
126 case Tegra::Shader::ImageType::TextureBuffer:
127 return {spv::Dim::Buffer, false};
128 case Tegra::Shader::ImageType::Texture1DArray:
129 return {spv::Dim::Dim1D, true};
130 case Tegra::Shader::ImageType::Texture2D:
131 return {spv::Dim::Dim2D, false};
132 case Tegra::Shader::ImageType::Texture2DArray:
133 return {spv::Dim::Dim2D, true};
134 case Tegra::Shader::ImageType::Texture3D:
135 return {spv::Dim::Dim3D, false};
136 default:
137 UNIMPLEMENTED_MSG("Unimplemented image type={}", image.type);
138 return {spv::Dim::Dim2D, false};
139 }
140}
141
142/// Returns the number of vertices present in a primitive topology.
143u32 GetNumPrimitiveTopologyVertices(Maxwell::PrimitiveTopology primitive_topology) {
144 switch (primitive_topology) {
145 case Maxwell::PrimitiveTopology::Points:
146 return 1;
147 case Maxwell::PrimitiveTopology::Lines:
148 case Maxwell::PrimitiveTopology::LineLoop:
149 case Maxwell::PrimitiveTopology::LineStrip:
150 return 2;
151 case Maxwell::PrimitiveTopology::Triangles:
152 case Maxwell::PrimitiveTopology::TriangleStrip:
153 case Maxwell::PrimitiveTopology::TriangleFan:
154 return 3;
155 case Maxwell::PrimitiveTopology::LinesAdjacency:
156 case Maxwell::PrimitiveTopology::LineStripAdjacency:
157 return 4;
158 case Maxwell::PrimitiveTopology::TrianglesAdjacency:
159 case Maxwell::PrimitiveTopology::TriangleStripAdjacency:
160 return 6;
161 case Maxwell::PrimitiveTopology::Quads:
162 UNIMPLEMENTED_MSG("Quads");
163 return 3;
164 case Maxwell::PrimitiveTopology::QuadStrip:
165 UNIMPLEMENTED_MSG("QuadStrip");
166 return 3;
167 case Maxwell::PrimitiveTopology::Polygon:
168 UNIMPLEMENTED_MSG("Polygon");
169 return 3;
170 case Maxwell::PrimitiveTopology::Patches:
171 UNIMPLEMENTED_MSG("Patches");
172 return 3;
173 default:
174 UNREACHABLE();
175 return 3;
176 }
177}
178
179spv::ExecutionMode GetExecutionMode(Maxwell::TessellationPrimitive primitive) {
180 switch (primitive) {
181 case Maxwell::TessellationPrimitive::Isolines:
182 return spv::ExecutionMode::Isolines;
183 case Maxwell::TessellationPrimitive::Triangles:
184 return spv::ExecutionMode::Triangles;
185 case Maxwell::TessellationPrimitive::Quads:
186 return spv::ExecutionMode::Quads;
187 }
188 UNREACHABLE();
189 return spv::ExecutionMode::Triangles;
190}
191
192spv::ExecutionMode GetExecutionMode(Maxwell::TessellationSpacing spacing) {
193 switch (spacing) {
194 case Maxwell::TessellationSpacing::Equal:
195 return spv::ExecutionMode::SpacingEqual;
196 case Maxwell::TessellationSpacing::FractionalOdd:
197 return spv::ExecutionMode::SpacingFractionalOdd;
198 case Maxwell::TessellationSpacing::FractionalEven:
199 return spv::ExecutionMode::SpacingFractionalEven;
200 }
201 UNREACHABLE();
202 return spv::ExecutionMode::SpacingEqual;
203}
204
205spv::ExecutionMode GetExecutionMode(Maxwell::PrimitiveTopology input_topology) {
206 switch (input_topology) {
207 case Maxwell::PrimitiveTopology::Points:
208 return spv::ExecutionMode::InputPoints;
209 case Maxwell::PrimitiveTopology::Lines:
210 case Maxwell::PrimitiveTopology::LineLoop:
211 case Maxwell::PrimitiveTopology::LineStrip:
212 return spv::ExecutionMode::InputLines;
213 case Maxwell::PrimitiveTopology::Triangles:
214 case Maxwell::PrimitiveTopology::TriangleStrip:
215 case Maxwell::PrimitiveTopology::TriangleFan:
216 return spv::ExecutionMode::Triangles;
217 case Maxwell::PrimitiveTopology::LinesAdjacency:
218 case Maxwell::PrimitiveTopology::LineStripAdjacency:
219 return spv::ExecutionMode::InputLinesAdjacency;
220 case Maxwell::PrimitiveTopology::TrianglesAdjacency:
221 case Maxwell::PrimitiveTopology::TriangleStripAdjacency:
222 return spv::ExecutionMode::InputTrianglesAdjacency;
223 case Maxwell::PrimitiveTopology::Quads:
224 UNIMPLEMENTED_MSG("Quads");
225 return spv::ExecutionMode::Triangles;
226 case Maxwell::PrimitiveTopology::QuadStrip:
227 UNIMPLEMENTED_MSG("QuadStrip");
228 return spv::ExecutionMode::Triangles;
229 case Maxwell::PrimitiveTopology::Polygon:
230 UNIMPLEMENTED_MSG("Polygon");
231 return spv::ExecutionMode::Triangles;
232 case Maxwell::PrimitiveTopology::Patches:
233 UNIMPLEMENTED_MSG("Patches");
234 return spv::ExecutionMode::Triangles;
235 }
236 UNREACHABLE();
237 return spv::ExecutionMode::Triangles;
238}
239
240spv::ExecutionMode GetExecutionMode(Tegra::Shader::OutputTopology output_topology) {
241 switch (output_topology) {
242 case Tegra::Shader::OutputTopology::PointList:
243 return spv::ExecutionMode::OutputPoints;
244 case Tegra::Shader::OutputTopology::LineStrip:
245 return spv::ExecutionMode::OutputLineStrip;
246 case Tegra::Shader::OutputTopology::TriangleStrip:
247 return spv::ExecutionMode::OutputTriangleStrip;
248 default:
249 UNREACHABLE();
250 return spv::ExecutionMode::OutputPoints;
251 }
252}
253
254/// Returns true if an attribute index is one of the 32 generic attributes
255constexpr bool IsGenericAttribute(Attribute::Index attribute) {
256 return attribute >= Attribute::Index::Attribute_0 &&
257 attribute <= Attribute::Index::Attribute_31;
258}
259
260/// Returns the location of a generic attribute
261u32 GetGenericAttributeLocation(Attribute::Index attribute) {
262 ASSERT(IsGenericAttribute(attribute));
263 return static_cast<u32>(attribute) - static_cast<u32>(Attribute::Index::Attribute_0);
264}
265
266/// Returns true if an object has to be treated as precise
267bool IsPrecise(Operation operand) {
268 const auto& meta{operand.GetMeta()};
269 if (std::holds_alternative<MetaArithmetic>(meta)) {
270 return std::get<MetaArithmetic>(meta).precise;
271 }
272 return false;
273}
274
275class SPIRVDecompiler final : public Sirit::Module {
276public:
277 explicit SPIRVDecompiler(const Device& device_, const ShaderIR& ir_, ShaderType stage_,
278 const Registry& registry_, const Specialization& specialization_)
279 : Module(0x00010300), device{device_}, ir{ir_}, stage{stage_}, header{ir_.GetHeader()},
280 registry{registry_}, specialization{specialization_} {
281 if (stage_ != ShaderType::Compute) {
282 transform_feedback = BuildTransformFeedback(registry_.GetGraphicsInfo());
283 }
284
285 AddCapability(spv::Capability::Shader);
286 AddCapability(spv::Capability::UniformAndStorageBuffer16BitAccess);
287 AddCapability(spv::Capability::ImageQuery);
288 AddCapability(spv::Capability::Image1D);
289 AddCapability(spv::Capability::ImageBuffer);
290 AddCapability(spv::Capability::ImageGatherExtended);
291 AddCapability(spv::Capability::SampledBuffer);
292 AddCapability(spv::Capability::StorageImageWriteWithoutFormat);
293 AddCapability(spv::Capability::DrawParameters);
294 AddCapability(spv::Capability::SubgroupBallotKHR);
295 AddCapability(spv::Capability::SubgroupVoteKHR);
296 AddExtension("SPV_KHR_16bit_storage");
297 AddExtension("SPV_KHR_shader_ballot");
298 AddExtension("SPV_KHR_subgroup_vote");
299 AddExtension("SPV_KHR_storage_buffer_storage_class");
300 AddExtension("SPV_KHR_variable_pointers");
301 AddExtension("SPV_KHR_shader_draw_parameters");
302
303 if (!transform_feedback.empty()) {
304 if (device.IsExtTransformFeedbackSupported()) {
305 AddCapability(spv::Capability::TransformFeedback);
306 } else {
307 LOG_ERROR(Render_Vulkan, "Shader requires transform feedbacks but these are not "
308 "supported on this device");
309 }
310 }
311 if (ir.UsesLayer() || ir.UsesViewportIndex()) {
312 if (ir.UsesViewportIndex()) {
313 AddCapability(spv::Capability::MultiViewport);
314 }
315 if (stage != ShaderType::Geometry && device.IsExtShaderViewportIndexLayerSupported()) {
316 AddExtension("SPV_EXT_shader_viewport_index_layer");
317 AddCapability(spv::Capability::ShaderViewportIndexLayerEXT);
318 }
319 }
320 if (device.IsFormatlessImageLoadSupported()) {
321 AddCapability(spv::Capability::StorageImageReadWithoutFormat);
322 }
323 if (device.IsFloat16Supported()) {
324 AddCapability(spv::Capability::Float16);
325 }
326 t_scalar_half = Name(TypeFloat(device_.IsFloat16Supported() ? 16 : 32), "scalar_half");
327 t_half = Name(TypeVector(t_scalar_half, 2), "half");
328
329 const Id main = Decompile();
330
331 switch (stage) {
332 case ShaderType::Vertex:
333 AddEntryPoint(spv::ExecutionModel::Vertex, main, "main", interfaces);
334 break;
335 case ShaderType::TesselationControl:
336 AddCapability(spv::Capability::Tessellation);
337 AddEntryPoint(spv::ExecutionModel::TessellationControl, main, "main", interfaces);
338 AddExecutionMode(main, spv::ExecutionMode::OutputVertices,
339 header.common2.threads_per_input_primitive);
340 break;
341 case ShaderType::TesselationEval: {
342 const auto& info = registry.GetGraphicsInfo();
343 AddCapability(spv::Capability::Tessellation);
344 AddEntryPoint(spv::ExecutionModel::TessellationEvaluation, main, "main", interfaces);
345 AddExecutionMode(main, GetExecutionMode(info.tessellation_primitive));
346 AddExecutionMode(main, GetExecutionMode(info.tessellation_spacing));
347 AddExecutionMode(main, info.tessellation_clockwise
348 ? spv::ExecutionMode::VertexOrderCw
349 : spv::ExecutionMode::VertexOrderCcw);
350 break;
351 }
352 case ShaderType::Geometry: {
353 const auto& info = registry.GetGraphicsInfo();
354 AddCapability(spv::Capability::Geometry);
355 AddEntryPoint(spv::ExecutionModel::Geometry, main, "main", interfaces);
356 AddExecutionMode(main, GetExecutionMode(info.primitive_topology));
357 AddExecutionMode(main, GetExecutionMode(header.common3.output_topology));
358 AddExecutionMode(main, spv::ExecutionMode::OutputVertices,
359 header.common4.max_output_vertices);
360 // TODO(Rodrigo): Where can we get this info from?
361 AddExecutionMode(main, spv::ExecutionMode::Invocations, 1U);
362 break;
363 }
364 case ShaderType::Fragment:
365 AddEntryPoint(spv::ExecutionModel::Fragment, main, "main", interfaces);
366 AddExecutionMode(main, spv::ExecutionMode::OriginUpperLeft);
367 if (header.ps.omap.depth) {
368 AddExecutionMode(main, spv::ExecutionMode::DepthReplacing);
369 }
370 if (specialization.early_fragment_tests) {
371 AddExecutionMode(main, spv::ExecutionMode::EarlyFragmentTests);
372 }
373 break;
374 case ShaderType::Compute:
375 const auto workgroup_size = specialization.workgroup_size;
376 AddExecutionMode(main, spv::ExecutionMode::LocalSize, workgroup_size[0],
377 workgroup_size[1], workgroup_size[2]);
378 AddEntryPoint(spv::ExecutionModel::GLCompute, main, "main", interfaces);
379 break;
380 }
381 }
382
383private:
384 Id Decompile() {
385 DeclareCommon();
386 DeclareVertex();
387 DeclareTessControl();
388 DeclareTessEval();
389 DeclareGeometry();
390 DeclareFragment();
391 DeclareCompute();
392 DeclareRegisters();
393 DeclareCustomVariables();
394 DeclarePredicates();
395 DeclareLocalMemory();
396 DeclareSharedMemory();
397 DeclareInternalFlags();
398 DeclareInputAttributes();
399 DeclareOutputAttributes();
400
401 u32 binding = specialization.base_binding;
402 binding = DeclareConstantBuffers(binding);
403 binding = DeclareGlobalBuffers(binding);
404 binding = DeclareUniformTexels(binding);
405 binding = DeclareSamplers(binding);
406 binding = DeclareStorageTexels(binding);
407 binding = DeclareImages(binding);
408
409 const Id main = OpFunction(t_void, {}, TypeFunction(t_void));
410 AddLabel();
411
412 if (ir.IsDecompiled()) {
413 DeclareFlowVariables();
414 DecompileAST();
415 } else {
416 AllocateLabels();
417 DecompileBranchMode();
418 }
419
420 OpReturn();
421 OpFunctionEnd();
422
423 return main;
424 }
425
426 void DefinePrologue() {
427 if (stage == ShaderType::Vertex) {
428 // Clear Position to avoid reading trash on the Z conversion.
429 const auto position_index = out_indices.position.value();
430 const Id position = AccessElement(t_out_float4, out_vertex, position_index);
431 OpStore(position, v_varying_default);
432
433 if (specialization.point_size) {
434 const u32 point_size_index = out_indices.point_size.value();
435 const Id out_point_size = AccessElement(t_out_float, out_vertex, point_size_index);
436 OpStore(out_point_size, Constant(t_float, *specialization.point_size));
437 }
438 }
439 }
440
441 void DecompileAST();
442
443 void DecompileBranchMode() {
444 const u32 first_address = ir.GetBasicBlocks().begin()->first;
445 const Id loop_label = OpLabel("loop");
446 const Id merge_label = OpLabel("merge");
447 const Id dummy_label = OpLabel();
448 const Id jump_label = OpLabel();
449 continue_label = OpLabel("continue");
450
451 std::vector<Sirit::Literal> literals;
452 std::vector<Id> branch_labels;
453 for (const auto& [literal, label] : labels) {
454 literals.push_back(literal);
455 branch_labels.push_back(label);
456 }
457
458 jmp_to = OpVariable(TypePointer(spv::StorageClass::Function, t_uint),
459 spv::StorageClass::Function, Constant(t_uint, first_address));
460 AddLocalVariable(jmp_to);
461
462 std::tie(ssy_flow_stack, ssy_flow_stack_top) = CreateFlowStack();
463 std::tie(pbk_flow_stack, pbk_flow_stack_top) = CreateFlowStack();
464
465 Name(jmp_to, "jmp_to");
466 Name(ssy_flow_stack, "ssy_flow_stack");
467 Name(ssy_flow_stack_top, "ssy_flow_stack_top");
468 Name(pbk_flow_stack, "pbk_flow_stack");
469 Name(pbk_flow_stack_top, "pbk_flow_stack_top");
470
471 DefinePrologue();
472
473 OpBranch(loop_label);
474 AddLabel(loop_label);
475 OpLoopMerge(merge_label, continue_label, spv::LoopControlMask::MaskNone);
476 OpBranch(dummy_label);
477
478 AddLabel(dummy_label);
479 const Id default_branch = OpLabel();
480 const Id jmp_to_load = OpLoad(t_uint, jmp_to);
481 OpSelectionMerge(jump_label, spv::SelectionControlMask::MaskNone);
482 OpSwitch(jmp_to_load, default_branch, literals, branch_labels);
483
484 AddLabel(default_branch);
485 OpReturn();
486
487 for (const auto& [address, bb] : ir.GetBasicBlocks()) {
488 AddLabel(labels.at(address));
489
490 VisitBasicBlock(bb);
491
492 const auto next_it = labels.lower_bound(address + 1);
493 const Id next_label = next_it != labels.end() ? next_it->second : default_branch;
494 OpBranch(next_label);
495 }
496
497 AddLabel(jump_label);
498 OpBranch(continue_label);
499 AddLabel(continue_label);
500 OpBranch(loop_label);
501 AddLabel(merge_label);
502 }
503
504private:
505 friend class ASTDecompiler;
506 friend class ExprDecompiler;
507
508 static constexpr auto INTERNAL_FLAGS_COUNT = static_cast<std::size_t>(InternalFlag::Amount);
509
510 void AllocateLabels() {
511 for (const auto& pair : ir.GetBasicBlocks()) {
512 const u32 address = pair.first;
513 labels.emplace(address, OpLabel(fmt::format("label_0x{:x}", address)));
514 }
515 }
516
517 void DeclareCommon() {
518 thread_id =
519 DeclareInputBuiltIn(spv::BuiltIn::SubgroupLocalInvocationId, t_in_uint, "thread_id");
520 thread_masks[0] =
521 DeclareInputBuiltIn(spv::BuiltIn::SubgroupEqMask, t_in_uint4, "thread_eq_mask");
522 thread_masks[1] =
523 DeclareInputBuiltIn(spv::BuiltIn::SubgroupGeMask, t_in_uint4, "thread_ge_mask");
524 thread_masks[2] =
525 DeclareInputBuiltIn(spv::BuiltIn::SubgroupGtMask, t_in_uint4, "thread_gt_mask");
526 thread_masks[3] =
527 DeclareInputBuiltIn(spv::BuiltIn::SubgroupLeMask, t_in_uint4, "thread_le_mask");
528 thread_masks[4] =
529 DeclareInputBuiltIn(spv::BuiltIn::SubgroupLtMask, t_in_uint4, "thread_lt_mask");
530 }
531
532 void DeclareVertex() {
533 if (stage != ShaderType::Vertex) {
534 return;
535 }
536 Id out_vertex_struct;
537 std::tie(out_vertex_struct, out_indices) = DeclareVertexStruct();
538 const Id vertex_ptr = TypePointer(spv::StorageClass::Output, out_vertex_struct);
539 out_vertex = OpVariable(vertex_ptr, spv::StorageClass::Output);
540 interfaces.push_back(AddGlobalVariable(Name(out_vertex, "out_vertex")));
541
542 // Declare input attributes
543 vertex_index = DeclareInputBuiltIn(spv::BuiltIn::VertexIndex, t_in_int, "vertex_index");
544 instance_index =
545 DeclareInputBuiltIn(spv::BuiltIn::InstanceIndex, t_in_int, "instance_index");
546 base_vertex = DeclareInputBuiltIn(spv::BuiltIn::BaseVertex, t_in_int, "base_vertex");
547 base_instance = DeclareInputBuiltIn(spv::BuiltIn::BaseInstance, t_in_int, "base_instance");
548 }
549
550 void DeclareTessControl() {
551 if (stage != ShaderType::TesselationControl) {
552 return;
553 }
554 DeclareInputVertexArray(NumInputPatches);
555 DeclareOutputVertexArray(header.common2.threads_per_input_primitive);
556
557 tess_level_outer = DeclareBuiltIn(
558 spv::BuiltIn::TessLevelOuter, spv::StorageClass::Output,
559 TypePointer(spv::StorageClass::Output, TypeArray(t_float, Constant(t_uint, 4U))),
560 "tess_level_outer");
561 Decorate(tess_level_outer, spv::Decoration::Patch);
562
563 tess_level_inner = DeclareBuiltIn(
564 spv::BuiltIn::TessLevelInner, spv::StorageClass::Output,
565 TypePointer(spv::StorageClass::Output, TypeArray(t_float, Constant(t_uint, 2U))),
566 "tess_level_inner");
567 Decorate(tess_level_inner, spv::Decoration::Patch);
568
569 invocation_id = DeclareInputBuiltIn(spv::BuiltIn::InvocationId, t_in_int, "invocation_id");
570 }
571
572 void DeclareTessEval() {
573 if (stage != ShaderType::TesselationEval) {
574 return;
575 }
576 DeclareInputVertexArray(NumInputPatches);
577 DeclareOutputVertex();
578
579 tess_coord = DeclareInputBuiltIn(spv::BuiltIn::TessCoord, t_in_float3, "tess_coord");
580 }
581
582 void DeclareGeometry() {
583 if (stage != ShaderType::Geometry) {
584 return;
585 }
586 const auto& info = registry.GetGraphicsInfo();
587 const u32 num_input = GetNumPrimitiveTopologyVertices(info.primitive_topology);
588 DeclareInputVertexArray(num_input);
589 DeclareOutputVertex();
590 }
591
592 void DeclareFragment() {
593 if (stage != ShaderType::Fragment) {
594 return;
595 }
596
597 for (u32 rt = 0; rt < static_cast<u32>(std::size(frag_colors)); ++rt) {
598 if (!IsRenderTargetEnabled(rt)) {
599 continue;
600 }
601 const Id id = AddGlobalVariable(OpVariable(t_out_float4, spv::StorageClass::Output));
602 Name(id, fmt::format("frag_color{}", rt));
603 Decorate(id, spv::Decoration::Location, rt);
604
605 frag_colors[rt] = id;
606 interfaces.push_back(id);
607 }
608
609 if (header.ps.omap.depth) {
610 frag_depth = AddGlobalVariable(OpVariable(t_out_float, spv::StorageClass::Output));
611 Name(frag_depth, "frag_depth");
612 Decorate(frag_depth, spv::Decoration::BuiltIn,
613 static_cast<u32>(spv::BuiltIn::FragDepth));
614
615 interfaces.push_back(frag_depth);
616 }
617
618 frag_coord = DeclareInputBuiltIn(spv::BuiltIn::FragCoord, t_in_float4, "frag_coord");
619 front_facing = DeclareInputBuiltIn(spv::BuiltIn::FrontFacing, t_in_bool, "front_facing");
620 point_coord = DeclareInputBuiltIn(spv::BuiltIn::PointCoord, t_in_float2, "point_coord");
621 }
622
623 void DeclareCompute() {
624 if (stage != ShaderType::Compute) {
625 return;
626 }
627
628 workgroup_id = DeclareInputBuiltIn(spv::BuiltIn::WorkgroupId, t_in_uint3, "workgroup_id");
629 local_invocation_id =
630 DeclareInputBuiltIn(spv::BuiltIn::LocalInvocationId, t_in_uint3, "local_invocation_id");
631 }
632
633 void DeclareRegisters() {
634 for (const u32 gpr : ir.GetRegisters()) {
635 const Id id = OpVariable(t_prv_float, spv::StorageClass::Private, v_float_zero);
636 Name(id, fmt::format("gpr_{}", gpr));
637 registers.emplace(gpr, AddGlobalVariable(id));
638 }
639 }
640
641 void DeclareCustomVariables() {
642 const u32 num_custom_variables = ir.GetNumCustomVariables();
643 for (u32 i = 0; i < num_custom_variables; ++i) {
644 const Id id = OpVariable(t_prv_float, spv::StorageClass::Private, v_float_zero);
645 Name(id, fmt::format("custom_var_{}", i));
646 custom_variables.emplace(i, AddGlobalVariable(id));
647 }
648 }
649
650 void DeclarePredicates() {
651 for (const auto pred : ir.GetPredicates()) {
652 const Id id = OpVariable(t_prv_bool, spv::StorageClass::Private, v_false);
653 Name(id, fmt::format("pred_{}", static_cast<u32>(pred)));
654 predicates.emplace(pred, AddGlobalVariable(id));
655 }
656 }
657
658 void DeclareFlowVariables() {
659 for (u32 i = 0; i < ir.GetASTNumVariables(); i++) {
660 const Id id = OpVariable(t_prv_bool, spv::StorageClass::Private, v_false);
661 Name(id, fmt::format("flow_var_{}", static_cast<u32>(i)));
662 flow_variables.emplace(i, AddGlobalVariable(id));
663 }
664 }
665
666 void DeclareLocalMemory() {
667 // TODO(Rodrigo): Unstub kernel local memory size and pass it from a register at
668 // specialization time.
669 const u64 lmem_size = stage == ShaderType::Compute ? 0x400 : header.GetLocalMemorySize();
670 if (lmem_size == 0) {
671 return;
672 }
673 const auto element_count = static_cast<u32>(Common::AlignUp(lmem_size, 4) / 4);
674 const Id type_array = TypeArray(t_float, Constant(t_uint, element_count));
675 const Id type_pointer = TypePointer(spv::StorageClass::Private, type_array);
676 Name(type_pointer, "LocalMemory");
677
678 local_memory =
679 OpVariable(type_pointer, spv::StorageClass::Private, ConstantNull(type_array));
680 AddGlobalVariable(Name(local_memory, "local_memory"));
681 }
682
683 void DeclareSharedMemory() {
684 if (stage != ShaderType::Compute) {
685 return;
686 }
687 t_smem_uint = TypePointer(spv::StorageClass::Workgroup, t_uint);
688
689 u32 smem_size = specialization.shared_memory_size * 4;
690 if (smem_size == 0) {
691 // Avoid declaring an empty array.
692 return;
693 }
694 const u32 limit = device.GetMaxComputeSharedMemorySize();
695 if (smem_size > limit) {
696 LOG_ERROR(Render_Vulkan, "Shared memory size {} is clamped to host's limit {}",
697 smem_size, limit);
698 smem_size = limit;
699 }
700
701 const Id type_array = TypeArray(t_uint, Constant(t_uint, smem_size / 4));
702 const Id type_pointer = TypePointer(spv::StorageClass::Workgroup, type_array);
703 Name(type_pointer, "SharedMemory");
704
705 shared_memory = OpVariable(type_pointer, spv::StorageClass::Workgroup);
706 AddGlobalVariable(Name(shared_memory, "shared_memory"));
707 }
708
709 void DeclareInternalFlags() {
710 static constexpr std::array names{"zero", "sign", "carry", "overflow"};
711
712 for (std::size_t flag = 0; flag < INTERNAL_FLAGS_COUNT; ++flag) {
713 const Id id = OpVariable(t_prv_bool, spv::StorageClass::Private, v_false);
714 internal_flags[flag] = AddGlobalVariable(Name(id, names[flag]));
715 }
716 }
717
718 void DeclareInputVertexArray(u32 length) {
719 constexpr auto storage = spv::StorageClass::Input;
720 std::tie(in_indices, in_vertex) = DeclareVertexArray(storage, "in_indices", length);
721 }
722
723 void DeclareOutputVertexArray(u32 length) {
724 constexpr auto storage = spv::StorageClass::Output;
725 std::tie(out_indices, out_vertex) = DeclareVertexArray(storage, "out_indices", length);
726 }
727
728 std::tuple<VertexIndices, Id> DeclareVertexArray(spv::StorageClass storage_class,
729 std::string name, u32 length) {
730 const auto [struct_id, indices] = DeclareVertexStruct();
731 const Id vertex_array = TypeArray(struct_id, Constant(t_uint, length));
732 const Id vertex_ptr = TypePointer(storage_class, vertex_array);
733 const Id vertex = OpVariable(vertex_ptr, storage_class);
734 AddGlobalVariable(Name(vertex, std::move(name)));
735 interfaces.push_back(vertex);
736 return {indices, vertex};
737 }
738
739 void DeclareOutputVertex() {
740 Id out_vertex_struct;
741 std::tie(out_vertex_struct, out_indices) = DeclareVertexStruct();
742 const Id out_vertex_ptr = TypePointer(spv::StorageClass::Output, out_vertex_struct);
743 out_vertex = OpVariable(out_vertex_ptr, spv::StorageClass::Output);
744 interfaces.push_back(AddGlobalVariable(Name(out_vertex, "out_vertex")));
745 }
746
747 void DeclareInputAttributes() {
748 for (const auto index : ir.GetInputAttributes()) {
749 if (!IsGenericAttribute(index)) {
750 continue;
751 }
752 const u32 location = GetGenericAttributeLocation(index);
753 if (!IsAttributeEnabled(location)) {
754 continue;
755 }
756 const auto type_descriptor = GetAttributeType(location);
757 Id type;
758 if (IsInputAttributeArray()) {
759 type = GetTypeVectorDefinitionLut(type_descriptor.type).at(3);
760 type = TypeArray(type, Constant(t_uint, GetNumInputVertices()));
761 type = TypePointer(spv::StorageClass::Input, type);
762 } else {
763 type = type_descriptor.vector;
764 }
765 const Id id = OpVariable(type, spv::StorageClass::Input);
766 AddGlobalVariable(Name(id, fmt::format("in_attr{}", location)));
767 input_attributes.emplace(index, id);
768 interfaces.push_back(id);
769
770 Decorate(id, spv::Decoration::Location, location);
771
772 if (stage != ShaderType::Fragment) {
773 continue;
774 }
775 switch (header.ps.GetPixelImap(location)) {
776 case PixelImap::Constant:
777 Decorate(id, spv::Decoration::Flat);
778 break;
779 case PixelImap::Perspective:
780 // Default
781 break;
782 case PixelImap::ScreenLinear:
783 Decorate(id, spv::Decoration::NoPerspective);
784 break;
785 default:
786 UNREACHABLE_MSG("Unused attribute being fetched");
787 }
788 }
789 }
790
791 void DeclareOutputAttributes() {
792 if (stage == ShaderType::Compute || stage == ShaderType::Fragment) {
793 return;
794 }
795
796 UNIMPLEMENTED_IF(registry.GetGraphicsInfo().tfb_enabled && stage != ShaderType::Vertex);
797 for (const auto index : ir.GetOutputAttributes()) {
798 if (!IsGenericAttribute(index)) {
799 continue;
800 }
801 DeclareOutputAttribute(index);
802 }
803 }
804
805 void DeclareOutputAttribute(Attribute::Index index) {
806 static constexpr std::string_view swizzle = "xyzw";
807
808 const u32 location = GetGenericAttributeLocation(index);
809 u8 element = 0;
810 while (element < 4) {
811 const std::size_t remainder = 4 - element;
812
813 std::size_t num_components = remainder;
814 const std::optional tfb = GetTransformFeedbackInfo(index, element);
815 if (tfb) {
816 num_components = tfb->components;
817 }
818
819 Id type = GetTypeVectorDefinitionLut(Type::Float).at(num_components - 1);
820 Id varying_default = v_varying_default;
821 if (IsOutputAttributeArray()) {
822 const u32 num = GetNumOutputVertices();
823 type = TypeArray(type, Constant(t_uint, num));
824 if (device.GetDriverID() != VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS_KHR) {
825 // Intel's proprietary driver fails to setup defaults for arrayed output
826 // attributes.
827 varying_default = ConstantComposite(type, std::vector(num, varying_default));
828 }
829 }
830 type = TypePointer(spv::StorageClass::Output, type);
831
832 std::string name = fmt::format("out_attr{}", location);
833 if (num_components < 4 || element > 0) {
834 name = fmt::format("{}_{}", name, swizzle.substr(element, num_components));
835 }
836
837 const Id id = OpVariable(type, spv::StorageClass::Output, varying_default);
838 Name(AddGlobalVariable(id), name);
839
840 GenericVaryingDescription description;
841 description.id = id;
842 description.first_element = element;
843 description.is_scalar = num_components == 1;
844 for (u32 i = 0; i < num_components; ++i) {
845 const u8 offset = static_cast<u8>(static_cast<u32>(index) * 4 + element + i);
846 output_attributes.emplace(offset, description);
847 }
848 interfaces.push_back(id);
849
850 Decorate(id, spv::Decoration::Location, location);
851 if (element > 0) {
852 Decorate(id, spv::Decoration::Component, static_cast<u32>(element));
853 }
854 if (tfb && device.IsExtTransformFeedbackSupported()) {
855 Decorate(id, spv::Decoration::XfbBuffer, static_cast<u32>(tfb->buffer));
856 Decorate(id, spv::Decoration::XfbStride, static_cast<u32>(tfb->stride));
857 Decorate(id, spv::Decoration::Offset, static_cast<u32>(tfb->offset));
858 }
859
860 element = static_cast<u8>(static_cast<std::size_t>(element) + num_components);
861 }
862 }
863
864 std::optional<VaryingTFB> GetTransformFeedbackInfo(Attribute::Index index, u8 element = 0) {
865 const u8 location = static_cast<u8>(static_cast<u32>(index) * 4 + element);
866 const auto it = transform_feedback.find(location);
867 if (it == transform_feedback.end()) {
868 return {};
869 }
870 return it->second;
871 }
872
873 u32 DeclareConstantBuffers(u32 binding) {
874 for (const auto& [index, size] : ir.GetConstantBuffers()) {
875 const Id type = device.IsKhrUniformBufferStandardLayoutSupported() ? t_cbuf_scalar_ubo
876 : t_cbuf_std140_ubo;
877 const Id id = OpVariable(type, spv::StorageClass::Uniform);
878 AddGlobalVariable(Name(id, fmt::format("cbuf_{}", index)));
879
880 Decorate(id, spv::Decoration::Binding, binding++);
881 Decorate(id, spv::Decoration::DescriptorSet, DESCRIPTOR_SET);
882 constant_buffers.emplace(index, id);
883 }
884 return binding;
885 }
886
887 u32 DeclareGlobalBuffers(u32 binding) {
888 for (const auto& [base, usage] : ir.GetGlobalMemory()) {
889 const Id id = OpVariable(t_gmem_ssbo, spv::StorageClass::StorageBuffer);
890 AddGlobalVariable(
891 Name(id, fmt::format("gmem_{}_{}", base.cbuf_index, base.cbuf_offset)));
892
893 Decorate(id, spv::Decoration::Binding, binding++);
894 Decorate(id, spv::Decoration::DescriptorSet, DESCRIPTOR_SET);
895 global_buffers.emplace(base, id);
896 }
897 return binding;
898 }
899
900 u32 DeclareUniformTexels(u32 binding) {
901 for (const auto& sampler : ir.GetSamplers()) {
902 if (!sampler.is_buffer) {
903 continue;
904 }
905 ASSERT(!sampler.is_array);
906 ASSERT(!sampler.is_shadow);
907
908 constexpr auto dim = spv::Dim::Buffer;
909 constexpr int depth = 0;
910 constexpr int arrayed = 0;
911 constexpr bool ms = false;
912 constexpr int sampled = 1;
913 constexpr auto format = spv::ImageFormat::Unknown;
914 const Id image_type = TypeImage(t_float, dim, depth, arrayed, ms, sampled, format);
915 const Id pointer_type = TypePointer(spv::StorageClass::UniformConstant, image_type);
916 const Id id = OpVariable(pointer_type, spv::StorageClass::UniformConstant);
917 AddGlobalVariable(Name(id, fmt::format("sampler_{}", sampler.index)));
918 Decorate(id, spv::Decoration::Binding, binding++);
919 Decorate(id, spv::Decoration::DescriptorSet, DESCRIPTOR_SET);
920
921 uniform_texels.emplace(sampler.index, TexelBuffer{image_type, id});
922 }
923 return binding;
924 }
925
926 u32 DeclareSamplers(u32 binding) {
927 for (const auto& sampler : ir.GetSamplers()) {
928 if (sampler.is_buffer) {
929 continue;
930 }
931 const auto dim = GetSamplerDim(sampler);
932 const int depth = sampler.is_shadow ? 1 : 0;
933 const int arrayed = sampler.is_array ? 1 : 0;
934 constexpr bool ms = false;
935 constexpr int sampled = 1;
936 constexpr auto format = spv::ImageFormat::Unknown;
937 const Id image_type = TypeImage(t_float, dim, depth, arrayed, ms, sampled, format);
938 const Id sampler_type = TypeSampledImage(image_type);
939 const Id sampler_pointer_type =
940 TypePointer(spv::StorageClass::UniformConstant, sampler_type);
941 const Id type = sampler.is_indexed
942 ? TypeArray(sampler_type, Constant(t_uint, sampler.size))
943 : sampler_type;
944 const Id pointer_type = TypePointer(spv::StorageClass::UniformConstant, type);
945 const Id id = OpVariable(pointer_type, spv::StorageClass::UniformConstant);
946 AddGlobalVariable(Name(id, fmt::format("sampler_{}", sampler.index)));
947 Decorate(id, spv::Decoration::Binding, binding++);
948 Decorate(id, spv::Decoration::DescriptorSet, DESCRIPTOR_SET);
949
950 sampled_images.emplace(
951 sampler.index, SampledImage{image_type, sampler_type, sampler_pointer_type, id});
952 }
953 return binding;
954 }
955
956 u32 DeclareStorageTexels(u32 binding) {
957 for (const auto& image : ir.GetImages()) {
958 if (image.type != Tegra::Shader::ImageType::TextureBuffer) {
959 continue;
960 }
961 DeclareImage(image, binding);
962 }
963 return binding;
964 }
965
966 u32 DeclareImages(u32 binding) {
967 for (const auto& image : ir.GetImages()) {
968 if (image.type == Tegra::Shader::ImageType::TextureBuffer) {
969 continue;
970 }
971 DeclareImage(image, binding);
972 }
973 return binding;
974 }
975
976 void DeclareImage(const ImageEntry& image, u32& binding) {
977 const auto [dim, arrayed] = GetImageDim(image);
978 constexpr int depth = 0;
979 constexpr bool ms = false;
980 constexpr int sampled = 2; // This won't be accessed with a sampler
981 const auto format = image.is_atomic ? spv::ImageFormat::R32ui : spv::ImageFormat::Unknown;
982 const Id image_type = TypeImage(t_uint, dim, depth, arrayed, ms, sampled, format, {});
983 const Id pointer_type = TypePointer(spv::StorageClass::UniformConstant, image_type);
984 const Id id = OpVariable(pointer_type, spv::StorageClass::UniformConstant);
985 AddGlobalVariable(Name(id, fmt::format("image_{}", image.index)));
986
987 Decorate(id, spv::Decoration::Binding, binding++);
988 Decorate(id, spv::Decoration::DescriptorSet, DESCRIPTOR_SET);
989 if (image.is_read && !image.is_written) {
990 Decorate(id, spv::Decoration::NonWritable);
991 } else if (image.is_written && !image.is_read) {
992 Decorate(id, spv::Decoration::NonReadable);
993 }
994
995 images.emplace(image.index, StorageImage{image_type, id});
996 }
997
998 bool IsRenderTargetEnabled(u32 rt) const {
999 for (u32 component = 0; component < 4; ++component) {
1000 if (header.ps.IsColorComponentOutputEnabled(rt, component)) {
1001 return true;
1002 }
1003 }
1004 return false;
1005 }
1006
1007 bool IsInputAttributeArray() const {
1008 return stage == ShaderType::TesselationControl || stage == ShaderType::TesselationEval ||
1009 stage == ShaderType::Geometry;
1010 }
1011
1012 bool IsOutputAttributeArray() const {
1013 return stage == ShaderType::TesselationControl;
1014 }
1015
1016 bool IsAttributeEnabled(u32 location) const {
1017 return stage != ShaderType::Vertex || specialization.enabled_attributes[location];
1018 }
1019
1020 u32 GetNumInputVertices() const {
1021 switch (stage) {
1022 case ShaderType::Geometry:
1023 return GetNumPrimitiveTopologyVertices(registry.GetGraphicsInfo().primitive_topology);
1024 case ShaderType::TesselationControl:
1025 case ShaderType::TesselationEval:
1026 return NumInputPatches;
1027 default:
1028 UNREACHABLE();
1029 return 1;
1030 }
1031 }
1032
1033 u32 GetNumOutputVertices() const {
1034 switch (stage) {
1035 case ShaderType::TesselationControl:
1036 return header.common2.threads_per_input_primitive;
1037 default:
1038 UNREACHABLE();
1039 return 1;
1040 }
1041 }
1042
1043 std::tuple<Id, VertexIndices> DeclareVertexStruct() {
1044 struct BuiltIn {
1045 Id type;
1046 spv::BuiltIn builtin;
1047 const char* name;
1048 };
1049 std::vector<BuiltIn> members;
1050 members.reserve(4);
1051
1052 const auto AddBuiltIn = [&](Id type, spv::BuiltIn builtin, const char* name) {
1053 const auto index = static_cast<u32>(members.size());
1054 members.push_back(BuiltIn{type, builtin, name});
1055 return index;
1056 };
1057
1058 VertexIndices indices;
1059 indices.position = AddBuiltIn(t_float4, spv::BuiltIn::Position, "position");
1060
1061 if (ir.UsesLayer()) {
1062 if (stage != ShaderType::Vertex || device.IsExtShaderViewportIndexLayerSupported()) {
1063 indices.layer = AddBuiltIn(t_int, spv::BuiltIn::Layer, "layer");
1064 } else {
1065 LOG_ERROR(
1066 Render_Vulkan,
1067 "Shader requires Layer but it's not supported on this stage with this device.");
1068 }
1069 }
1070
1071 if (ir.UsesViewportIndex()) {
1072 if (stage != ShaderType::Vertex || device.IsExtShaderViewportIndexLayerSupported()) {
1073 indices.viewport = AddBuiltIn(t_int, spv::BuiltIn::ViewportIndex, "viewport_index");
1074 } else {
1075 LOG_ERROR(Render_Vulkan, "Shader requires ViewportIndex but it's not supported on "
1076 "this stage with this device.");
1077 }
1078 }
1079
1080 if (ir.UsesPointSize() || specialization.point_size) {
1081 indices.point_size = AddBuiltIn(t_float, spv::BuiltIn::PointSize, "point_size");
1082 }
1083
1084 const auto& ir_output_attributes = ir.GetOutputAttributes();
1085 const bool declare_clip_distances = std::any_of(
1086 ir_output_attributes.begin(), ir_output_attributes.end(), [](const auto& index) {
1087 return index == Attribute::Index::ClipDistances0123 ||
1088 index == Attribute::Index::ClipDistances4567;
1089 });
1090 if (declare_clip_distances) {
1091 indices.clip_distances = AddBuiltIn(TypeArray(t_float, Constant(t_uint, 8)),
1092 spv::BuiltIn::ClipDistance, "clip_distances");
1093 }
1094
1095 std::vector<Id> member_types;
1096 member_types.reserve(members.size());
1097 for (std::size_t i = 0; i < members.size(); ++i) {
1098 member_types.push_back(members[i].type);
1099 }
1100 const Id per_vertex_struct = Name(TypeStruct(member_types), "PerVertex");
1101 Decorate(per_vertex_struct, spv::Decoration::Block);
1102
1103 for (std::size_t index = 0; index < members.size(); ++index) {
1104 const auto& member = members[index];
1105 MemberName(per_vertex_struct, static_cast<u32>(index), member.name);
1106 MemberDecorate(per_vertex_struct, static_cast<u32>(index), spv::Decoration::BuiltIn,
1107 static_cast<u32>(member.builtin));
1108 }
1109
1110 return {per_vertex_struct, indices};
1111 }
1112
1113 void VisitBasicBlock(const NodeBlock& bb) {
1114 for (const auto& node : bb) {
1115 Visit(node);
1116 }
1117 }
1118
1119 Expression Visit(const Node& node) {
1120 if (const auto operation = std::get_if<OperationNode>(&*node)) {
1121 if (const auto amend_index = operation->GetAmendIndex()) {
1122 [[maybe_unused]] const Type type = Visit(ir.GetAmendNode(*amend_index)).type;
1123 ASSERT(type == Type::Void);
1124 }
1125 const auto operation_index = static_cast<std::size_t>(operation->GetCode());
1126 const auto decompiler = operation_decompilers[operation_index];
1127 if (decompiler == nullptr) {
1128 UNREACHABLE_MSG("Operation decompiler {} not defined", operation_index);
1129 }
1130 return (this->*decompiler)(*operation);
1131 }
1132
1133 if (const auto gpr = std::get_if<GprNode>(&*node)) {
1134 const u32 index = gpr->GetIndex();
1135 if (index == Register::ZeroIndex) {
1136 return {v_float_zero, Type::Float};
1137 }
1138 return {OpLoad(t_float, registers.at(index)), Type::Float};
1139 }
1140
1141 if (const auto cv = std::get_if<CustomVarNode>(&*node)) {
1142 const u32 index = cv->GetIndex();
1143 return {OpLoad(t_float, custom_variables.at(index)), Type::Float};
1144 }
1145
1146 if (const auto immediate = std::get_if<ImmediateNode>(&*node)) {
1147 return {Constant(t_uint, immediate->GetValue()), Type::Uint};
1148 }
1149
1150 if (const auto predicate = std::get_if<PredicateNode>(&*node)) {
1151 const auto value = [&]() -> Id {
1152 switch (const auto index = predicate->GetIndex(); index) {
1153 case Tegra::Shader::Pred::UnusedIndex:
1154 return v_true;
1155 case Tegra::Shader::Pred::NeverExecute:
1156 return v_false;
1157 default:
1158 return OpLoad(t_bool, predicates.at(index));
1159 }
1160 }();
1161 if (predicate->IsNegated()) {
1162 return {OpLogicalNot(t_bool, value), Type::Bool};
1163 }
1164 return {value, Type::Bool};
1165 }
1166
1167 if (const auto abuf = std::get_if<AbufNode>(&*node)) {
1168 const auto attribute = abuf->GetIndex();
1169 const u32 element = abuf->GetElement();
1170 const auto& buffer = abuf->GetBuffer();
1171
1172 const auto ArrayPass = [&](Id pointer_type, Id composite, std::vector<u32> indices) {
1173 std::vector<Id> members;
1174 members.reserve(std::size(indices) + 1);
1175
1176 if (buffer && IsInputAttributeArray()) {
1177 members.push_back(AsUint(Visit(buffer)));
1178 }
1179 for (const u32 index : indices) {
1180 members.push_back(Constant(t_uint, index));
1181 }
1182 return OpAccessChain(pointer_type, composite, members);
1183 };
1184
1185 switch (attribute) {
1186 case Attribute::Index::Position: {
1187 if (stage == ShaderType::Fragment) {
1188 return {OpLoad(t_float, AccessElement(t_in_float, frag_coord, element)),
1189 Type::Float};
1190 }
1191 const std::vector elements = {in_indices.position.value(), element};
1192 return {OpLoad(t_float, ArrayPass(t_in_float, in_vertex, elements)), Type::Float};
1193 }
1194 case Attribute::Index::PointCoord: {
1195 switch (element) {
1196 case 0:
1197 case 1:
1198 return {OpCompositeExtract(t_float, OpLoad(t_float2, point_coord), element),
1199 Type::Float};
1200 }
1201 UNIMPLEMENTED_MSG("Unimplemented point coord element={}", element);
1202 return {v_float_zero, Type::Float};
1203 }
1204 case Attribute::Index::TessCoordInstanceIDVertexID:
1205 // TODO(Subv): Find out what the values are for the first two elements when inside a
1206 // vertex shader, and what's the value of the fourth element when inside a Tess Eval
1207 // shader.
1208 switch (element) {
1209 case 0:
1210 case 1:
1211 return {OpLoad(t_float, AccessElement(t_in_float, tess_coord, element)),
1212 Type::Float};
1213 case 2:
1214 return {
1215 OpISub(t_int, OpLoad(t_int, instance_index), OpLoad(t_int, base_instance)),
1216 Type::Int};
1217 case 3:
1218 return {OpISub(t_int, OpLoad(t_int, vertex_index), OpLoad(t_int, base_vertex)),
1219 Type::Int};
1220 }
1221 UNIMPLEMENTED_MSG("Unmanaged TessCoordInstanceIDVertexID element={}", element);
1222 return {Constant(t_uint, 0U), Type::Uint};
1223 case Attribute::Index::FrontFacing:
1224 // TODO(Subv): Find out what the values are for the other elements.
1225 ASSERT(stage == ShaderType::Fragment);
1226 if (element == 3) {
1227 const Id is_front_facing = OpLoad(t_bool, front_facing);
1228 const Id true_value = Constant(t_int, static_cast<s32>(-1));
1229 const Id false_value = Constant(t_int, 0);
1230 return {OpSelect(t_int, is_front_facing, true_value, false_value), Type::Int};
1231 }
1232 UNIMPLEMENTED_MSG("Unmanaged FrontFacing element={}", element);
1233 return {v_float_zero, Type::Float};
1234 default:
1235 if (!IsGenericAttribute(attribute)) {
1236 break;
1237 }
1238 const u32 location = GetGenericAttributeLocation(attribute);
1239 if (!IsAttributeEnabled(location)) {
1240 // Disabled attributes (also known as constant attributes) always return zero.
1241 return {v_float_zero, Type::Float};
1242 }
1243 const auto type_descriptor = GetAttributeType(location);
1244 const Type type = type_descriptor.type;
1245 const Id attribute_id = input_attributes.at(attribute);
1246 const std::vector elements = {element};
1247 const Id pointer = ArrayPass(type_descriptor.scalar, attribute_id, elements);
1248 return {OpLoad(GetTypeDefinition(type), pointer), type};
1249 }
1250 UNIMPLEMENTED_MSG("Unhandled input attribute: {}", attribute);
1251 return {v_float_zero, Type::Float};
1252 }
1253
1254 if (const auto cbuf = std::get_if<CbufNode>(&*node)) {
1255 const Node& offset = cbuf->GetOffset();
1256 const Id buffer_id = constant_buffers.at(cbuf->GetIndex());
1257
1258 Id pointer{};
1259 if (device.IsKhrUniformBufferStandardLayoutSupported()) {
1260 const Id buffer_offset =
1261 OpShiftRightLogical(t_uint, AsUint(Visit(offset)), Constant(t_uint, 2U));
1262 pointer =
1263 OpAccessChain(t_cbuf_float, buffer_id, Constant(t_uint, 0U), buffer_offset);
1264 } else {
1265 Id buffer_index{};
1266 Id buffer_element{};
1267 if (const auto immediate = std::get_if<ImmediateNode>(&*offset)) {
1268 // Direct access
1269 const u32 offset_imm = immediate->GetValue();
1270 ASSERT(offset_imm % 4 == 0);
1271 buffer_index = Constant(t_uint, offset_imm / 16);
1272 buffer_element = Constant(t_uint, (offset_imm / 4) % 4);
1273 } else if (std::holds_alternative<OperationNode>(*offset)) {
1274 // Indirect access
1275 const Id offset_id = AsUint(Visit(offset));
1276 const Id unsafe_offset = OpUDiv(t_uint, offset_id, Constant(t_uint, 4));
1277 const Id final_offset =
1278 OpUMod(t_uint, unsafe_offset, Constant(t_uint, MaxConstBufferElements - 1));
1279 buffer_index = OpUDiv(t_uint, final_offset, Constant(t_uint, 4));
1280 buffer_element = OpUMod(t_uint, final_offset, Constant(t_uint, 4));
1281 } else {
1282 UNREACHABLE_MSG("Unmanaged offset node type");
1283 }
1284 pointer = OpAccessChain(t_cbuf_float, buffer_id, v_uint_zero, buffer_index,
1285 buffer_element);
1286 }
1287 return {OpLoad(t_float, pointer), Type::Float};
1288 }
1289
1290 if (const auto gmem = std::get_if<GmemNode>(&*node)) {
1291 return {OpLoad(t_uint, GetGlobalMemoryPointer(*gmem)), Type::Uint};
1292 }
1293
1294 if (const auto lmem = std::get_if<LmemNode>(&*node)) {
1295 Id address = AsUint(Visit(lmem->GetAddress()));
1296 address = OpShiftRightLogical(t_uint, address, Constant(t_uint, 2U));
1297 const Id pointer = OpAccessChain(t_prv_float, local_memory, address);
1298 return {OpLoad(t_float, pointer), Type::Float};
1299 }
1300
1301 if (const auto smem = std::get_if<SmemNode>(&*node)) {
1302 return {OpLoad(t_uint, GetSharedMemoryPointer(*smem)), Type::Uint};
1303 }
1304
1305 if (const auto internal_flag = std::get_if<InternalFlagNode>(&*node)) {
1306 const Id flag = internal_flags.at(static_cast<std::size_t>(internal_flag->GetFlag()));
1307 return {OpLoad(t_bool, flag), Type::Bool};
1308 }
1309
1310 if (const auto conditional = std::get_if<ConditionalNode>(&*node)) {
1311 if (const auto amend_index = conditional->GetAmendIndex()) {
1312 [[maybe_unused]] const Type type = Visit(ir.GetAmendNode(*amend_index)).type;
1313 ASSERT(type == Type::Void);
1314 }
1315 // It's invalid to call conditional on nested nodes, use an operation instead
1316 const Id true_label = OpLabel();
1317 const Id skip_label = OpLabel();
1318 const Id condition = AsBool(Visit(conditional->GetCondition()));
1319 OpSelectionMerge(skip_label, spv::SelectionControlMask::MaskNone);
1320 OpBranchConditional(condition, true_label, skip_label);
1321 AddLabel(true_label);
1322
1323 conditional_branch_set = true;
1324 inside_branch = false;
1325 VisitBasicBlock(conditional->GetCode());
1326 conditional_branch_set = false;
1327 if (!inside_branch) {
1328 OpBranch(skip_label);
1329 } else {
1330 inside_branch = false;
1331 }
1332 AddLabel(skip_label);
1333 return {};
1334 }
1335
1336 if (const auto comment = std::get_if<CommentNode>(&*node)) {
1337 if (device.HasDebuggingToolAttached()) {
1338 // We should insert comments with OpString instead of using named variables
1339 Name(OpUndef(t_int), comment->GetText());
1340 }
1341 return {};
1342 }
1343
1344 UNREACHABLE();
1345 return {};
1346 }
1347
1348 template <Id (Module::*func)(Id, Id), Type result_type, Type type_a = result_type>
1349 Expression Unary(Operation operation) {
1350 const Id type_def = GetTypeDefinition(result_type);
1351 const Id op_a = As(Visit(operation[0]), type_a);
1352
1353 const Id value = (this->*func)(type_def, op_a);
1354 if (IsPrecise(operation)) {
1355 Decorate(value, spv::Decoration::NoContraction);
1356 }
1357 return {value, result_type};
1358 }
1359
1360 template <Id (Module::*func)(Id, Id, Id), Type result_type, Type type_a = result_type,
1361 Type type_b = type_a>
1362 Expression Binary(Operation operation) {
1363 const Id type_def = GetTypeDefinition(result_type);
1364 const Id op_a = As(Visit(operation[0]), type_a);
1365 const Id op_b = As(Visit(operation[1]), type_b);
1366
1367 const Id value = (this->*func)(type_def, op_a, op_b);
1368 if (IsPrecise(operation)) {
1369 Decorate(value, spv::Decoration::NoContraction);
1370 }
1371 return {value, result_type};
1372 }
1373
1374 template <Id (Module::*func)(Id, Id, Id, Id), Type result_type, Type type_a = result_type,
1375 Type type_b = type_a, Type type_c = type_b>
1376 Expression Ternary(Operation operation) {
1377 const Id type_def = GetTypeDefinition(result_type);
1378 const Id op_a = As(Visit(operation[0]), type_a);
1379 const Id op_b = As(Visit(operation[1]), type_b);
1380 const Id op_c = As(Visit(operation[2]), type_c);
1381
1382 const Id value = (this->*func)(type_def, op_a, op_b, op_c);
1383 if (IsPrecise(operation)) {
1384 Decorate(value, spv::Decoration::NoContraction);
1385 }
1386 return {value, result_type};
1387 }
1388
1389 template <Id (Module::*func)(Id, Id, Id, Id, Id), Type result_type, Type type_a = result_type,
1390 Type type_b = type_a, Type type_c = type_b, Type type_d = type_c>
1391 Expression Quaternary(Operation operation) {
1392 const Id type_def = GetTypeDefinition(result_type);
1393 const Id op_a = As(Visit(operation[0]), type_a);
1394 const Id op_b = As(Visit(operation[1]), type_b);
1395 const Id op_c = As(Visit(operation[2]), type_c);
1396 const Id op_d = As(Visit(operation[3]), type_d);
1397
1398 const Id value = (this->*func)(type_def, op_a, op_b, op_c, op_d);
1399 if (IsPrecise(operation)) {
1400 Decorate(value, spv::Decoration::NoContraction);
1401 }
1402 return {value, result_type};
1403 }
1404
1405 Expression Assign(Operation operation) {
1406 const Node& dest = operation[0];
1407 const Node& src = operation[1];
1408
1409 Expression target{};
1410 if (const auto gpr = std::get_if<GprNode>(&*dest)) {
1411 if (gpr->GetIndex() == Register::ZeroIndex) {
1412 // Writing to Register::ZeroIndex is a no op but we still have to visit its source
1413 // because it might have side effects.
1414 Visit(src);
1415 return {};
1416 }
1417 target = {registers.at(gpr->GetIndex()), Type::Float};
1418
1419 } else if (const auto abuf = std::get_if<AbufNode>(&*dest)) {
1420 const auto& buffer = abuf->GetBuffer();
1421 const auto ArrayPass = [&](Id pointer_type, Id composite, std::vector<u32> indices) {
1422 std::vector<Id> members;
1423 members.reserve(std::size(indices) + 1);
1424
1425 if (buffer && IsOutputAttributeArray()) {
1426 members.push_back(AsUint(Visit(buffer)));
1427 }
1428 for (const u32 index : indices) {
1429 members.push_back(Constant(t_uint, index));
1430 }
1431 return OpAccessChain(pointer_type, composite, members);
1432 };
1433
1434 target = [&]() -> Expression {
1435 const u32 element = abuf->GetElement();
1436 switch (const auto attribute = abuf->GetIndex(); attribute) {
1437 case Attribute::Index::Position: {
1438 const u32 index = out_indices.position.value();
1439 return {ArrayPass(t_out_float, out_vertex, {index, element}), Type::Float};
1440 }
1441 case Attribute::Index::LayerViewportPointSize:
1442 switch (element) {
1443 case 1: {
1444 if (!out_indices.layer) {
1445 return {};
1446 }
1447 const u32 index = out_indices.layer.value();
1448 return {AccessElement(t_out_int, out_vertex, index), Type::Int};
1449 }
1450 case 2: {
1451 if (!out_indices.viewport) {
1452 return {};
1453 }
1454 const u32 index = out_indices.viewport.value();
1455 return {AccessElement(t_out_int, out_vertex, index), Type::Int};
1456 }
1457 case 3: {
1458 const auto index = out_indices.point_size.value();
1459 return {AccessElement(t_out_float, out_vertex, index), Type::Float};
1460 }
1461 default:
1462 UNIMPLEMENTED_MSG("LayerViewportPoint element={}", abuf->GetElement());
1463 return {};
1464 }
1465 case Attribute::Index::ClipDistances0123: {
1466 const u32 index = out_indices.clip_distances.value();
1467 return {AccessElement(t_out_float, out_vertex, index, element), Type::Float};
1468 }
1469 case Attribute::Index::ClipDistances4567: {
1470 const u32 index = out_indices.clip_distances.value();
1471 return {AccessElement(t_out_float, out_vertex, index, element + 4),
1472 Type::Float};
1473 }
1474 default:
1475 if (IsGenericAttribute(attribute)) {
1476 const u8 offset = static_cast<u8>(static_cast<u8>(attribute) * 4 + element);
1477 const GenericVaryingDescription description = output_attributes.at(offset);
1478 const Id composite = description.id;
1479 std::vector<u32> indices;
1480 if (!description.is_scalar) {
1481 indices.push_back(element - description.first_element);
1482 }
1483 return {ArrayPass(t_out_float, composite, indices), Type::Float};
1484 }
1485 UNIMPLEMENTED_MSG("Unhandled output attribute: {}",
1486 static_cast<u32>(attribute));
1487 return {};
1488 }
1489 }();
1490
1491 } else if (const auto patch = std::get_if<PatchNode>(&*dest)) {
1492 target = [&]() -> Expression {
1493 const u32 offset = patch->GetOffset();
1494 switch (offset) {
1495 case 0:
1496 case 1:
1497 case 2:
1498 case 3:
1499 return {AccessElement(t_out_float, tess_level_outer, offset % 4), Type::Float};
1500 case 4:
1501 case 5:
1502 return {AccessElement(t_out_float, tess_level_inner, offset % 4), Type::Float};
1503 }
1504 UNIMPLEMENTED_MSG("Unhandled patch output offset: {}", offset);
1505 return {};
1506 }();
1507
1508 } else if (const auto lmem = std::get_if<LmemNode>(&*dest)) {
1509 Id address = AsUint(Visit(lmem->GetAddress()));
1510 address = OpUDiv(t_uint, address, Constant(t_uint, 4));
1511 target = {OpAccessChain(t_prv_float, local_memory, address), Type::Float};
1512
1513 } else if (const auto smem = std::get_if<SmemNode>(&*dest)) {
1514 target = {GetSharedMemoryPointer(*smem), Type::Uint};
1515
1516 } else if (const auto gmem = std::get_if<GmemNode>(&*dest)) {
1517 target = {GetGlobalMemoryPointer(*gmem), Type::Uint};
1518
1519 } else if (const auto cv = std::get_if<CustomVarNode>(&*dest)) {
1520 target = {custom_variables.at(cv->GetIndex()), Type::Float};
1521
1522 } else {
1523 UNIMPLEMENTED();
1524 }
1525
1526 if (!target.id) {
1527 // On failure we return a nullptr target.id, skip these stores.
1528 return {};
1529 }
1530
1531 OpStore(target.id, As(Visit(src), target.type));
1532 return {};
1533 }
1534
1535 template <u32 offset>
1536 Expression FCastHalf(Operation operation) {
1537 const Id value = AsHalfFloat(Visit(operation[0]));
1538 return {GetFloatFromHalfScalar(OpCompositeExtract(t_scalar_half, value, offset)),
1539 Type::Float};
1540 }
1541
1542 Expression FSwizzleAdd(Operation operation) {
1543 const Id minus = Constant(t_float, -1.0f);
1544 const Id plus = v_float_one;
1545 const Id zero = v_float_zero;
1546 const Id lut_a = ConstantComposite(t_float4, minus, plus, minus, zero);
1547 const Id lut_b = ConstantComposite(t_float4, minus, minus, plus, minus);
1548
1549 Id mask = OpLoad(t_uint, thread_id);
1550 mask = OpBitwiseAnd(t_uint, mask, Constant(t_uint, 3));
1551 mask = OpShiftLeftLogical(t_uint, mask, Constant(t_uint, 1));
1552 mask = OpShiftRightLogical(t_uint, AsUint(Visit(operation[2])), mask);
1553 mask = OpBitwiseAnd(t_uint, mask, Constant(t_uint, 3));
1554
1555 const Id modifier_a = OpVectorExtractDynamic(t_float, lut_a, mask);
1556 const Id modifier_b = OpVectorExtractDynamic(t_float, lut_b, mask);
1557
1558 const Id op_a = OpFMul(t_float, AsFloat(Visit(operation[0])), modifier_a);
1559 const Id op_b = OpFMul(t_float, AsFloat(Visit(operation[1])), modifier_b);
1560 return {OpFAdd(t_float, op_a, op_b), Type::Float};
1561 }
1562
1563 Expression HNegate(Operation operation) {
1564 const bool is_f16 = device.IsFloat16Supported();
1565 const Id minus_one = Constant(t_scalar_half, is_f16 ? 0xbc00 : 0xbf800000);
1566 const Id one = Constant(t_scalar_half, is_f16 ? 0x3c00 : 0x3f800000);
1567 const auto GetNegate = [&](std::size_t index) {
1568 return OpSelect(t_scalar_half, AsBool(Visit(operation[index])), minus_one, one);
1569 };
1570 const Id negation = OpCompositeConstruct(t_half, GetNegate(1), GetNegate(2));
1571 return {OpFMul(t_half, AsHalfFloat(Visit(operation[0])), negation), Type::HalfFloat};
1572 }
1573
1574 Expression HClamp(Operation operation) {
1575 const auto Pack = [&](std::size_t index) {
1576 const Id scalar = GetHalfScalarFromFloat(AsFloat(Visit(operation[index])));
1577 return OpCompositeConstruct(t_half, scalar, scalar);
1578 };
1579 const Id value = AsHalfFloat(Visit(operation[0]));
1580 const Id min = Pack(1);
1581 const Id max = Pack(2);
1582
1583 const Id clamped = OpFClamp(t_half, value, min, max);
1584 if (IsPrecise(operation)) {
1585 Decorate(clamped, spv::Decoration::NoContraction);
1586 }
1587 return {clamped, Type::HalfFloat};
1588 }
1589
1590 Expression HCastFloat(Operation operation) {
1591 const Id value = GetHalfScalarFromFloat(AsFloat(Visit(operation[0])));
1592 return {OpCompositeConstruct(t_half, value, Constant(t_scalar_half, 0)), Type::HalfFloat};
1593 }
1594
1595 Expression HUnpack(Operation operation) {
1596 Expression operand = Visit(operation[0]);
1597 const auto type = std::get<Tegra::Shader::HalfType>(operation.GetMeta());
1598 if (type == Tegra::Shader::HalfType::H0_H1) {
1599 return operand;
1600 }
1601 const auto value = [&] {
1602 switch (std::get<Tegra::Shader::HalfType>(operation.GetMeta())) {
1603 case Tegra::Shader::HalfType::F32:
1604 return GetHalfScalarFromFloat(AsFloat(operand));
1605 case Tegra::Shader::HalfType::H0_H0:
1606 return OpCompositeExtract(t_scalar_half, AsHalfFloat(operand), 0);
1607 case Tegra::Shader::HalfType::H1_H1:
1608 return OpCompositeExtract(t_scalar_half, AsHalfFloat(operand), 1);
1609 default:
1610 UNREACHABLE();
1611 return ConstantNull(t_half);
1612 }
1613 }();
1614 return {OpCompositeConstruct(t_half, value, value), Type::HalfFloat};
1615 }
1616
1617 Expression HMergeF32(Operation operation) {
1618 const Id value = AsHalfFloat(Visit(operation[0]));
1619 return {GetFloatFromHalfScalar(OpCompositeExtract(t_scalar_half, value, 0)), Type::Float};
1620 }
1621
1622 template <u32 offset>
1623 Expression HMergeHN(Operation operation) {
1624 const Id target = AsHalfFloat(Visit(operation[0]));
1625 const Id source = AsHalfFloat(Visit(operation[1]));
1626 const Id object = OpCompositeExtract(t_scalar_half, source, offset);
1627 return {OpCompositeInsert(t_half, object, target, offset), Type::HalfFloat};
1628 }
1629
1630 Expression HPack2(Operation operation) {
1631 const Id low = GetHalfScalarFromFloat(AsFloat(Visit(operation[0])));
1632 const Id high = GetHalfScalarFromFloat(AsFloat(Visit(operation[1])));
1633 return {OpCompositeConstruct(t_half, low, high), Type::HalfFloat};
1634 }
1635
1636 Expression LogicalAddCarry(Operation operation) {
1637 const Id op_a = AsUint(Visit(operation[0]));
1638 const Id op_b = AsUint(Visit(operation[1]));
1639
1640 const Id result = OpIAddCarry(TypeStruct({t_uint, t_uint}), op_a, op_b);
1641 const Id carry = OpCompositeExtract(t_uint, result, 1);
1642 return {OpINotEqual(t_bool, carry, v_uint_zero), Type::Bool};
1643 }
1644
1645 Expression LogicalAssign(Operation operation) {
1646 const Node& dest = operation[0];
1647 const Node& src = operation[1];
1648
1649 Id target{};
1650 if (const auto pred = std::get_if<PredicateNode>(&*dest)) {
1651 ASSERT_MSG(!pred->IsNegated(), "Negating logical assignment");
1652
1653 const auto index = pred->GetIndex();
1654 switch (index) {
1655 case Tegra::Shader::Pred::NeverExecute:
1656 case Tegra::Shader::Pred::UnusedIndex:
1657 // Writing to these predicates is a no-op
1658 return {};
1659 }
1660 target = predicates.at(index);
1661
1662 } else if (const auto flag = std::get_if<InternalFlagNode>(&*dest)) {
1663 target = internal_flags.at(static_cast<u32>(flag->GetFlag()));
1664 }
1665
1666 OpStore(target, AsBool(Visit(src)));
1667 return {};
1668 }
1669
1670 Expression LogicalFOrdered(Operation operation) {
1671 // Emulate SPIR-V's OpOrdered
1672 const Id op_a = AsFloat(Visit(operation[0]));
1673 const Id op_b = AsFloat(Visit(operation[1]));
1674 const Id is_num_a = OpFOrdEqual(t_bool, op_a, op_a);
1675 const Id is_num_b = OpFOrdEqual(t_bool, op_b, op_b);
1676 return {OpLogicalAnd(t_bool, is_num_a, is_num_b), Type::Bool};
1677 }
1678
1679 Expression LogicalFUnordered(Operation operation) {
1680 // Emulate SPIR-V's OpUnordered
1681 const Id op_a = AsFloat(Visit(operation[0]));
1682 const Id op_b = AsFloat(Visit(operation[1]));
1683 const Id is_nan_a = OpIsNan(t_bool, op_a);
1684 const Id is_nan_b = OpIsNan(t_bool, op_b);
1685 return {OpLogicalOr(t_bool, is_nan_a, is_nan_b), Type::Bool};
1686 }
1687
1688 Id GetTextureSampler(Operation operation) {
1689 const auto& meta = std::get<MetaTexture>(operation.GetMeta());
1690 ASSERT(!meta.sampler.is_buffer);
1691
1692 const auto& entry = sampled_images.at(meta.sampler.index);
1693 Id sampler = entry.variable;
1694 if (meta.sampler.is_indexed) {
1695 const Id index = AsInt(Visit(meta.index));
1696 sampler = OpAccessChain(entry.sampler_pointer_type, sampler, index);
1697 }
1698 return OpLoad(entry.sampler_type, sampler);
1699 }
1700
1701 Id GetTextureImage(Operation operation) {
1702 const auto& meta = std::get<MetaTexture>(operation.GetMeta());
1703 const u32 index = meta.sampler.index;
1704 if (meta.sampler.is_buffer) {
1705 const auto& entry = uniform_texels.at(index);
1706 return OpLoad(entry.image_type, entry.image);
1707 } else {
1708 const auto& entry = sampled_images.at(index);
1709 return OpImage(entry.image_type, GetTextureSampler(operation));
1710 }
1711 }
1712
1713 Id GetImage(Operation operation) {
1714 const auto& meta = std::get<MetaImage>(operation.GetMeta());
1715 const auto entry = images.at(meta.image.index);
1716 return OpLoad(entry.image_type, entry.image);
1717 }
1718
1719 Id AssembleVector(const std::vector<Id>& coords, Type type) {
1720 const Id coords_type = GetTypeVectorDefinitionLut(type).at(coords.size() - 1);
1721 return coords.size() == 1 ? coords[0] : OpCompositeConstruct(coords_type, coords);
1722 }
1723
1724 Id GetCoordinates(Operation operation, Type type) {
1725 std::vector<Id> coords;
1726 for (std::size_t i = 0; i < operation.GetOperandsCount(); ++i) {
1727 coords.push_back(As(Visit(operation[i]), type));
1728 }
1729 if (const auto meta = std::get_if<MetaTexture>(&operation.GetMeta())) {
1730 // Add array coordinate for textures
1731 if (meta->sampler.is_array) {
1732 Id array = AsInt(Visit(meta->array));
1733 if (type == Type::Float) {
1734 array = OpConvertSToF(t_float, array);
1735 }
1736 coords.push_back(array);
1737 }
1738 }
1739 return AssembleVector(coords, type);
1740 }
1741
1742 Id GetOffsetCoordinates(Operation operation) {
1743 const auto& meta = std::get<MetaTexture>(operation.GetMeta());
1744 std::vector<Id> coords;
1745 coords.reserve(meta.aoffi.size());
1746 for (const auto& coord : meta.aoffi) {
1747 coords.push_back(AsInt(Visit(coord)));
1748 }
1749 return AssembleVector(coords, Type::Int);
1750 }
1751
1752 std::pair<Id, Id> GetDerivatives(Operation operation) {
1753 const auto& meta = std::get<MetaTexture>(operation.GetMeta());
1754 const auto& derivatives = meta.derivates;
1755 ASSERT(derivatives.size() % 2 == 0);
1756
1757 const std::size_t components = derivatives.size() / 2;
1758 std::vector<Id> dx, dy;
1759 dx.reserve(components);
1760 dy.reserve(components);
1761 for (std::size_t index = 0; index < components; ++index) {
1762 dx.push_back(AsFloat(Visit(derivatives.at(index * 2 + 0))));
1763 dy.push_back(AsFloat(Visit(derivatives.at(index * 2 + 1))));
1764 }
1765 return {AssembleVector(dx, Type::Float), AssembleVector(dy, Type::Float)};
1766 }
1767
1768 Expression GetTextureElement(Operation operation, Id sample_value, Type type) {
1769 const auto& meta = std::get<MetaTexture>(operation.GetMeta());
1770 const auto type_def = GetTypeDefinition(type);
1771 return {OpCompositeExtract(type_def, sample_value, meta.element), type};
1772 }
1773
1774 Expression Texture(Operation operation) {
1775 const auto& meta = std::get<MetaTexture>(operation.GetMeta());
1776
1777 const bool can_implicit = stage == ShaderType::Fragment;
1778 const Id sampler = GetTextureSampler(operation);
1779 const Id coords = GetCoordinates(operation, Type::Float);
1780
1781 std::vector<Id> operands;
1782 spv::ImageOperandsMask mask{};
1783 if (meta.bias) {
1784 mask = mask | spv::ImageOperandsMask::Bias;
1785 operands.push_back(AsFloat(Visit(meta.bias)));
1786 }
1787
1788 if (!can_implicit) {
1789 mask = mask | spv::ImageOperandsMask::Lod;
1790 operands.push_back(v_float_zero);
1791 }
1792
1793 if (!meta.aoffi.empty()) {
1794 mask = mask | spv::ImageOperandsMask::Offset;
1795 operands.push_back(GetOffsetCoordinates(operation));
1796 }
1797
1798 if (meta.depth_compare) {
1799 // Depth sampling
1800 UNIMPLEMENTED_IF(meta.bias);
1801 const Id dref = AsFloat(Visit(meta.depth_compare));
1802 if (can_implicit) {
1803 return {
1804 OpImageSampleDrefImplicitLod(t_float, sampler, coords, dref, mask, operands),
1805 Type::Float};
1806 } else {
1807 return {
1808 OpImageSampleDrefExplicitLod(t_float, sampler, coords, dref, mask, operands),
1809 Type::Float};
1810 }
1811 }
1812
1813 Id texture;
1814 if (can_implicit) {
1815 texture = OpImageSampleImplicitLod(t_float4, sampler, coords, mask, operands);
1816 } else {
1817 texture = OpImageSampleExplicitLod(t_float4, sampler, coords, mask, operands);
1818 }
1819 return GetTextureElement(operation, texture, Type::Float);
1820 }
1821
1822 Expression TextureLod(Operation operation) {
1823 const auto& meta = std::get<MetaTexture>(operation.GetMeta());
1824
1825 const Id sampler = GetTextureSampler(operation);
1826 const Id coords = GetCoordinates(operation, Type::Float);
1827 const Id lod = AsFloat(Visit(meta.lod));
1828
1829 spv::ImageOperandsMask mask = spv::ImageOperandsMask::Lod;
1830 std::vector<Id> operands{lod};
1831
1832 if (!meta.aoffi.empty()) {
1833 mask = mask | spv::ImageOperandsMask::Offset;
1834 operands.push_back(GetOffsetCoordinates(operation));
1835 }
1836
1837 if (meta.sampler.is_shadow) {
1838 const Id dref = AsFloat(Visit(meta.depth_compare));
1839 return {OpImageSampleDrefExplicitLod(t_float, sampler, coords, dref, mask, operands),
1840 Type::Float};
1841 }
1842 const Id texture = OpImageSampleExplicitLod(t_float4, sampler, coords, mask, operands);
1843 return GetTextureElement(operation, texture, Type::Float);
1844 }
1845
1846 Expression TextureGather(Operation operation) {
1847 const auto& meta = std::get<MetaTexture>(operation.GetMeta());
1848
1849 const Id coords = GetCoordinates(operation, Type::Float);
1850
1851 spv::ImageOperandsMask mask = spv::ImageOperandsMask::MaskNone;
1852 std::vector<Id> operands;
1853 Id texture{};
1854
1855 if (!meta.aoffi.empty()) {
1856 mask = mask | spv::ImageOperandsMask::Offset;
1857 operands.push_back(GetOffsetCoordinates(operation));
1858 }
1859
1860 if (meta.sampler.is_shadow) {
1861 texture = OpImageDrefGather(t_float4, GetTextureSampler(operation), coords,
1862 AsFloat(Visit(meta.depth_compare)), mask, operands);
1863 } else {
1864 u32 component_value = 0;
1865 if (meta.component) {
1866 const auto component = std::get_if<ImmediateNode>(&*meta.component);
1867 ASSERT_MSG(component, "Component is not an immediate value");
1868 component_value = component->GetValue();
1869 }
1870 texture = OpImageGather(t_float4, GetTextureSampler(operation), coords,
1871 Constant(t_uint, component_value), mask, operands);
1872 }
1873 return GetTextureElement(operation, texture, Type::Float);
1874 }
1875
1876 Expression TextureQueryDimensions(Operation operation) {
1877 const auto& meta = std::get<MetaTexture>(operation.GetMeta());
1878 UNIMPLEMENTED_IF(!meta.aoffi.empty());
1879 UNIMPLEMENTED_IF(meta.depth_compare);
1880
1881 const auto image_id = GetTextureImage(operation);
1882 if (meta.element == 3) {
1883 return {OpImageQueryLevels(t_int, image_id), Type::Int};
1884 }
1885
1886 const Id lod = AsUint(Visit(operation[0]));
1887 const std::size_t coords_count = [&meta] {
1888 switch (const auto type = meta.sampler.type) {
1889 case Tegra::Shader::TextureType::Texture1D:
1890 return 1;
1891 case Tegra::Shader::TextureType::Texture2D:
1892 case Tegra::Shader::TextureType::TextureCube:
1893 return 2;
1894 case Tegra::Shader::TextureType::Texture3D:
1895 return 3;
1896 default:
1897 UNREACHABLE_MSG("Invalid texture type={}", type);
1898 return 2;
1899 }
1900 }();
1901
1902 if (meta.element >= coords_count) {
1903 return {v_float_zero, Type::Float};
1904 }
1905
1906 const std::array<Id, 3> types = {t_int, t_int2, t_int3};
1907 const Id sizes = OpImageQuerySizeLod(types.at(coords_count - 1), image_id, lod);
1908 const Id size = OpCompositeExtract(t_int, sizes, meta.element);
1909 return {size, Type::Int};
1910 }
1911
1912 Expression TextureQueryLod(Operation operation) {
1913 const auto& meta = std::get<MetaTexture>(operation.GetMeta());
1914 UNIMPLEMENTED_IF(!meta.aoffi.empty());
1915 UNIMPLEMENTED_IF(meta.depth_compare);
1916
1917 if (meta.element >= 2) {
1918 UNREACHABLE_MSG("Invalid element");
1919 return {v_float_zero, Type::Float};
1920 }
1921 const auto sampler_id = GetTextureSampler(operation);
1922
1923 const Id multiplier = Constant(t_float, 256.0f);
1924 const Id multipliers = ConstantComposite(t_float2, multiplier, multiplier);
1925
1926 const Id coords = GetCoordinates(operation, Type::Float);
1927 Id size = OpImageQueryLod(t_float2, sampler_id, coords);
1928 size = OpFMul(t_float2, size, multipliers);
1929 size = OpConvertFToS(t_int2, size);
1930 return GetTextureElement(operation, size, Type::Int);
1931 }
1932
1933 Expression TexelFetch(Operation operation) {
1934 const auto& meta = std::get<MetaTexture>(operation.GetMeta());
1935 UNIMPLEMENTED_IF(meta.depth_compare);
1936
1937 const Id image = GetTextureImage(operation);
1938 const Id coords = GetCoordinates(operation, Type::Int);
1939
1940 spv::ImageOperandsMask mask = spv::ImageOperandsMask::MaskNone;
1941 std::vector<Id> operands;
1942 Id fetch;
1943
1944 if (meta.lod && !meta.sampler.is_buffer) {
1945 mask = mask | spv::ImageOperandsMask::Lod;
1946 operands.push_back(AsInt(Visit(meta.lod)));
1947 }
1948
1949 if (!meta.aoffi.empty()) {
1950 mask = mask | spv::ImageOperandsMask::Offset;
1951 operands.push_back(GetOffsetCoordinates(operation));
1952 }
1953
1954 fetch = OpImageFetch(t_float4, image, coords, mask, operands);
1955 return GetTextureElement(operation, fetch, Type::Float);
1956 }
1957
1958 Expression TextureGradient(Operation operation) {
1959 const auto& meta = std::get<MetaTexture>(operation.GetMeta());
1960 UNIMPLEMENTED_IF(!meta.aoffi.empty());
1961
1962 const Id sampler = GetTextureSampler(operation);
1963 const Id coords = GetCoordinates(operation, Type::Float);
1964 const auto [dx, dy] = GetDerivatives(operation);
1965 const std::vector grad = {dx, dy};
1966
1967 static constexpr auto mask = spv::ImageOperandsMask::Grad;
1968 const Id texture = OpImageSampleExplicitLod(t_float4, sampler, coords, mask, grad);
1969 return GetTextureElement(operation, texture, Type::Float);
1970 }
1971
1972 Expression ImageLoad(Operation operation) {
1973 if (!device.IsFormatlessImageLoadSupported()) {
1974 return {v_float_zero, Type::Float};
1975 }
1976
1977 const auto& meta{std::get<MetaImage>(operation.GetMeta())};
1978
1979 const Id coords = GetCoordinates(operation, Type::Int);
1980 const Id texel = OpImageRead(t_uint4, GetImage(operation), coords);
1981
1982 return {OpCompositeExtract(t_uint, texel, meta.element), Type::Uint};
1983 }
1984
1985 Expression ImageStore(Operation operation) {
1986 const auto meta{std::get<MetaImage>(operation.GetMeta())};
1987 std::vector<Id> colors;
1988 for (const auto& value : meta.values) {
1989 colors.push_back(AsUint(Visit(value)));
1990 }
1991
1992 const Id coords = GetCoordinates(operation, Type::Int);
1993 const Id texel = OpCompositeConstruct(t_uint4, colors);
1994
1995 OpImageWrite(GetImage(operation), coords, texel, {});
1996 return {};
1997 }
1998
1999 template <Id (Module::*func)(Id, Id, Id, Id, Id)>
2000 Expression AtomicImage(Operation operation) {
2001 const auto& meta{std::get<MetaImage>(operation.GetMeta())};
2002 ASSERT(meta.values.size() == 1);
2003
2004 const Id coordinate = GetCoordinates(operation, Type::Int);
2005 const Id image = images.at(meta.image.index).image;
2006 const Id sample = v_uint_zero;
2007 const Id pointer = OpImageTexelPointer(t_image_uint, image, coordinate, sample);
2008
2009 const Id scope = Constant(t_uint, static_cast<u32>(spv::Scope::Device));
2010 const Id semantics = v_uint_zero;
2011 const Id value = AsUint(Visit(meta.values[0]));
2012 return {(this->*func)(t_uint, pointer, scope, semantics, value), Type::Uint};
2013 }
2014
2015 template <Id (Module::*func)(Id, Id, Id, Id, Id)>
2016 Expression Atomic(Operation operation) {
2017 Id pointer;
2018 if (const auto smem = std::get_if<SmemNode>(&*operation[0])) {
2019 pointer = GetSharedMemoryPointer(*smem);
2020 } else if (const auto gmem = std::get_if<GmemNode>(&*operation[0])) {
2021 pointer = GetGlobalMemoryPointer(*gmem);
2022 } else {
2023 UNREACHABLE();
2024 return {v_float_zero, Type::Float};
2025 }
2026 const Id scope = Constant(t_uint, static_cast<u32>(spv::Scope::Device));
2027 const Id semantics = v_uint_zero;
2028 const Id value = AsUint(Visit(operation[1]));
2029
2030 return {(this->*func)(t_uint, pointer, scope, semantics, value), Type::Uint};
2031 }
2032
2033 template <Id (Module::*func)(Id, Id, Id, Id, Id)>
2034 Expression Reduce(Operation operation) {
2035 Atomic<func>(operation);
2036 return {};
2037 }
2038
2039 Expression Branch(Operation operation) {
2040 const auto& target = std::get<ImmediateNode>(*operation[0]);
2041 OpStore(jmp_to, Constant(t_uint, target.GetValue()));
2042 OpBranch(continue_label);
2043 inside_branch = true;
2044 if (!conditional_branch_set) {
2045 AddLabel();
2046 }
2047 return {};
2048 }
2049
2050 Expression BranchIndirect(Operation operation) {
2051 const Id op_a = AsUint(Visit(operation[0]));
2052
2053 OpStore(jmp_to, op_a);
2054 OpBranch(continue_label);
2055 inside_branch = true;
2056 if (!conditional_branch_set) {
2057 AddLabel();
2058 }
2059 return {};
2060 }
2061
2062 Expression PushFlowStack(Operation operation) {
2063 const auto& target = std::get<ImmediateNode>(*operation[0]);
2064 const auto [flow_stack, flow_stack_top] = GetFlowStack(operation);
2065 const Id current = OpLoad(t_uint, flow_stack_top);
2066 const Id next = OpIAdd(t_uint, current, Constant(t_uint, 1));
2067 const Id access = OpAccessChain(t_func_uint, flow_stack, current);
2068
2069 OpStore(access, Constant(t_uint, target.GetValue()));
2070 OpStore(flow_stack_top, next);
2071 return {};
2072 }
2073
2074 Expression PopFlowStack(Operation operation) {
2075 const auto [flow_stack, flow_stack_top] = GetFlowStack(operation);
2076 const Id current = OpLoad(t_uint, flow_stack_top);
2077 const Id previous = OpISub(t_uint, current, Constant(t_uint, 1));
2078 const Id access = OpAccessChain(t_func_uint, flow_stack, previous);
2079 const Id target = OpLoad(t_uint, access);
2080
2081 OpStore(flow_stack_top, previous);
2082 OpStore(jmp_to, target);
2083 OpBranch(continue_label);
2084 inside_branch = true;
2085 if (!conditional_branch_set) {
2086 AddLabel();
2087 }
2088 return {};
2089 }
2090
2091 Id MaxwellToSpirvComparison(Maxwell::ComparisonOp compare_op, Id operand_1, Id operand_2) {
2092 using Compare = Maxwell::ComparisonOp;
2093 switch (compare_op) {
2094 case Compare::NeverOld:
2095 return v_false; // Never let the test pass
2096 case Compare::LessOld:
2097 return OpFOrdLessThan(t_bool, operand_1, operand_2);
2098 case Compare::EqualOld:
2099 return OpFOrdEqual(t_bool, operand_1, operand_2);
2100 case Compare::LessEqualOld:
2101 return OpFOrdLessThanEqual(t_bool, operand_1, operand_2);
2102 case Compare::GreaterOld:
2103 return OpFOrdGreaterThan(t_bool, operand_1, operand_2);
2104 case Compare::NotEqualOld:
2105 return OpFOrdNotEqual(t_bool, operand_1, operand_2);
2106 case Compare::GreaterEqualOld:
2107 return OpFOrdGreaterThanEqual(t_bool, operand_1, operand_2);
2108 default:
2109 UNREACHABLE();
2110 return v_true;
2111 }
2112 }
2113
2114 void AlphaTest(Id pointer) {
2115 if (specialization.alpha_test_func == Maxwell::ComparisonOp::AlwaysOld) {
2116 return;
2117 }
2118 const Id true_label = OpLabel();
2119 const Id discard_label = OpLabel();
2120 const Id alpha_reference = Constant(t_float, specialization.alpha_test_ref);
2121 const Id alpha_value = OpLoad(t_float, pointer);
2122 const Id condition =
2123 MaxwellToSpirvComparison(specialization.alpha_test_func, alpha_value, alpha_reference);
2124
2125 OpBranchConditional(condition, true_label, discard_label);
2126 AddLabel(discard_label);
2127 OpKill();
2128 AddLabel(true_label);
2129 }
2130
2131 void PreExit() {
2132 if (stage == ShaderType::Vertex && specialization.ndc_minus_one_to_one) {
2133 const u32 position_index = out_indices.position.value();
2134 const Id z_pointer = AccessElement(t_out_float, out_vertex, position_index, 2U);
2135 const Id w_pointer = AccessElement(t_out_float, out_vertex, position_index, 3U);
2136 Id depth = OpLoad(t_float, z_pointer);
2137 depth = OpFAdd(t_float, depth, OpLoad(t_float, w_pointer));
2138 depth = OpFMul(t_float, depth, Constant(t_float, 0.5f));
2139 OpStore(z_pointer, depth);
2140 }
2141 if (stage == ShaderType::Fragment) {
2142 const auto SafeGetRegister = [this](u32 reg) {
2143 if (const auto it = registers.find(reg); it != registers.end()) {
2144 return OpLoad(t_float, it->second);
2145 }
2146 return v_float_zero;
2147 };
2148
2149 UNIMPLEMENTED_IF_MSG(header.ps.omap.sample_mask != 0,
2150 "Sample mask write is unimplemented");
2151
2152 // Write the color outputs using the data in the shader registers, disabled
2153 // rendertargets/components are skipped in the register assignment.
2154 u32 current_reg = 0;
2155 for (u32 rt = 0; rt < Maxwell::NumRenderTargets; ++rt) {
2156 // TODO(Subv): Figure out how dual-source blending is configured in the Switch.
2157 for (u32 component = 0; component < 4; ++component) {
2158 if (!header.ps.IsColorComponentOutputEnabled(rt, component)) {
2159 continue;
2160 }
2161 const Id pointer = AccessElement(t_out_float, frag_colors[rt], component);
2162 OpStore(pointer, SafeGetRegister(current_reg));
2163 if (rt == 0 && component == 3) {
2164 AlphaTest(pointer);
2165 }
2166 ++current_reg;
2167 }
2168 }
2169 if (header.ps.omap.depth) {
2170 // The depth output is always 2 registers after the last color output, and
2171 // current_reg already contains one past the last color register.
2172 OpStore(frag_depth, SafeGetRegister(current_reg + 1));
2173 }
2174 }
2175 }
2176
2177 Expression Exit(Operation operation) {
2178 PreExit();
2179 inside_branch = true;
2180 if (conditional_branch_set) {
2181 OpReturn();
2182 } else {
2183 const Id dummy = OpLabel();
2184 OpBranch(dummy);
2185 AddLabel(dummy);
2186 OpReturn();
2187 AddLabel();
2188 }
2189 return {};
2190 }
2191
2192 Expression Discard(Operation operation) {
2193 inside_branch = true;
2194 if (conditional_branch_set) {
2195 OpKill();
2196 } else {
2197 const Id dummy = OpLabel();
2198 OpBranch(dummy);
2199 AddLabel(dummy);
2200 OpKill();
2201 AddLabel();
2202 }
2203 return {};
2204 }
2205
2206 Expression EmitVertex(Operation) {
2207 OpEmitVertex();
2208 return {};
2209 }
2210
2211 Expression EndPrimitive(Operation operation) {
2212 OpEndPrimitive();
2213 return {};
2214 }
2215
2216 Expression InvocationId(Operation) {
2217 return {OpLoad(t_int, invocation_id), Type::Int};
2218 }
2219
2220 Expression YNegate(Operation) {
2221 LOG_WARNING(Render_Vulkan, "(STUBBED)");
2222 return {Constant(t_float, 1.0f), Type::Float};
2223 }
2224
2225 template <u32 element>
2226 Expression LocalInvocationId(Operation) {
2227 const Id id = OpLoad(t_uint3, local_invocation_id);
2228 return {OpCompositeExtract(t_uint, id, element), Type::Uint};
2229 }
2230
2231 template <u32 element>
2232 Expression WorkGroupId(Operation operation) {
2233 const Id id = OpLoad(t_uint3, workgroup_id);
2234 return {OpCompositeExtract(t_uint, id, element), Type::Uint};
2235 }
2236
2237 Expression BallotThread(Operation operation) {
2238 const Id predicate = AsBool(Visit(operation[0]));
2239 const Id ballot = OpSubgroupBallotKHR(t_uint4, predicate);
2240
2241 if (!device.IsWarpSizePotentiallyBiggerThanGuest()) {
2242 // Guest-like devices can just return the first index.
2243 return {OpCompositeExtract(t_uint, ballot, 0U), Type::Uint};
2244 }
2245
2246 // The others will have to return what is local to the current thread.
2247 // For instance a device with a warp size of 64 will return the upper uint when the current
2248 // thread is 38.
2249 const Id tid = OpLoad(t_uint, thread_id);
2250 const Id thread_index = OpShiftRightLogical(t_uint, tid, Constant(t_uint, 5));
2251 return {OpVectorExtractDynamic(t_uint, ballot, thread_index), Type::Uint};
2252 }
2253
2254 template <Id (Module::*func)(Id, Id)>
2255 Expression Vote(Operation operation) {
2256 // TODO(Rodrigo): Handle devices with different warp sizes
2257 const Id predicate = AsBool(Visit(operation[0]));
2258 return {(this->*func)(t_bool, predicate), Type::Bool};
2259 }
2260
2261 Expression ThreadId(Operation) {
2262 return {OpLoad(t_uint, thread_id), Type::Uint};
2263 }
2264
2265 template <std::size_t index>
2266 Expression ThreadMask(Operation) {
2267 // TODO(Rodrigo): Handle devices with different warp sizes
2268 const Id mask = thread_masks[index];
2269 return {OpLoad(t_uint, AccessElement(t_in_uint, mask, 0)), Type::Uint};
2270 }
2271
2272 Expression ShuffleIndexed(Operation operation) {
2273 const Id value = AsFloat(Visit(operation[0]));
2274 const Id index = AsUint(Visit(operation[1]));
2275 return {OpSubgroupReadInvocationKHR(t_float, value, index), Type::Float};
2276 }
2277
2278 Expression Barrier(Operation) {
2279 if (!ir.IsDecompiled()) {
2280 LOG_ERROR(Render_Vulkan, "OpBarrier used by shader is not decompiled");
2281 return {};
2282 }
2283
2284 const auto scope = spv::Scope::Workgroup;
2285 const auto memory = spv::Scope::Workgroup;
2286 const auto semantics =
2287 spv::MemorySemanticsMask::WorkgroupMemory | spv::MemorySemanticsMask::AcquireRelease;
2288 OpControlBarrier(Constant(t_uint, static_cast<u32>(scope)),
2289 Constant(t_uint, static_cast<u32>(memory)),
2290 Constant(t_uint, static_cast<u32>(semantics)));
2291 return {};
2292 }
2293
2294 template <spv::Scope scope>
2295 Expression MemoryBarrier(Operation) {
2296 const auto semantics =
2297 spv::MemorySemanticsMask::AcquireRelease | spv::MemorySemanticsMask::UniformMemory |
2298 spv::MemorySemanticsMask::WorkgroupMemory |
2299 spv::MemorySemanticsMask::AtomicCounterMemory | spv::MemorySemanticsMask::ImageMemory;
2300
2301 OpMemoryBarrier(Constant(t_uint, static_cast<u32>(scope)),
2302 Constant(t_uint, static_cast<u32>(semantics)));
2303 return {};
2304 }
2305
2306 Id DeclareBuiltIn(spv::BuiltIn builtin, spv::StorageClass storage, Id type, std::string name) {
2307 const Id id = OpVariable(type, storage);
2308 Decorate(id, spv::Decoration::BuiltIn, static_cast<u32>(builtin));
2309 AddGlobalVariable(Name(id, std::move(name)));
2310 interfaces.push_back(id);
2311 return id;
2312 }
2313
2314 Id DeclareInputBuiltIn(spv::BuiltIn builtin, Id type, std::string name) {
2315 return DeclareBuiltIn(builtin, spv::StorageClass::Input, type, std::move(name));
2316 }
2317
2318 template <typename... Args>
2319 Id AccessElement(Id pointer_type, Id composite, Args... elements_) {
2320 std::vector<Id> members;
2321 auto elements = {elements_...};
2322 for (const auto element : elements) {
2323 members.push_back(Constant(t_uint, element));
2324 }
2325
2326 return OpAccessChain(pointer_type, composite, members);
2327 }
2328
2329 Id As(Expression expr, Type wanted_type) {
2330 switch (wanted_type) {
2331 case Type::Bool:
2332 return AsBool(expr);
2333 case Type::Bool2:
2334 return AsBool2(expr);
2335 case Type::Float:
2336 return AsFloat(expr);
2337 case Type::Int:
2338 return AsInt(expr);
2339 case Type::Uint:
2340 return AsUint(expr);
2341 case Type::HalfFloat:
2342 return AsHalfFloat(expr);
2343 default:
2344 UNREACHABLE();
2345 return expr.id;
2346 }
2347 }
2348
2349 Id AsBool(Expression expr) {
2350 ASSERT(expr.type == Type::Bool);
2351 return expr.id;
2352 }
2353
2354 Id AsBool2(Expression expr) {
2355 ASSERT(expr.type == Type::Bool2);
2356 return expr.id;
2357 }
2358
2359 Id AsFloat(Expression expr) {
2360 switch (expr.type) {
2361 case Type::Float:
2362 return expr.id;
2363 case Type::Int:
2364 case Type::Uint:
2365 return OpBitcast(t_float, expr.id);
2366 case Type::HalfFloat:
2367 if (device.IsFloat16Supported()) {
2368 return OpBitcast(t_float, expr.id);
2369 }
2370 return OpBitcast(t_float, OpPackHalf2x16(t_uint, expr.id));
2371 default:
2372 UNREACHABLE();
2373 return expr.id;
2374 }
2375 }
2376
2377 Id AsInt(Expression expr) {
2378 switch (expr.type) {
2379 case Type::Int:
2380 return expr.id;
2381 case Type::Float:
2382 case Type::Uint:
2383 return OpBitcast(t_int, expr.id);
2384 case Type::HalfFloat:
2385 if (device.IsFloat16Supported()) {
2386 return OpBitcast(t_int, expr.id);
2387 }
2388 return OpPackHalf2x16(t_int, expr.id);
2389 default:
2390 UNREACHABLE();
2391 return expr.id;
2392 }
2393 }
2394
2395 Id AsUint(Expression expr) {
2396 switch (expr.type) {
2397 case Type::Uint:
2398 return expr.id;
2399 case Type::Float:
2400 case Type::Int:
2401 return OpBitcast(t_uint, expr.id);
2402 case Type::HalfFloat:
2403 if (device.IsFloat16Supported()) {
2404 return OpBitcast(t_uint, expr.id);
2405 }
2406 return OpPackHalf2x16(t_uint, expr.id);
2407 default:
2408 UNREACHABLE();
2409 return expr.id;
2410 }
2411 }
2412
2413 Id AsHalfFloat(Expression expr) {
2414 switch (expr.type) {
2415 case Type::HalfFloat:
2416 return expr.id;
2417 case Type::Float:
2418 case Type::Int:
2419 case Type::Uint:
2420 if (device.IsFloat16Supported()) {
2421 return OpBitcast(t_half, expr.id);
2422 }
2423 return OpUnpackHalf2x16(t_half, AsUint(expr));
2424 default:
2425 UNREACHABLE();
2426 return expr.id;
2427 }
2428 }
2429
2430 Id GetHalfScalarFromFloat(Id value) {
2431 if (device.IsFloat16Supported()) {
2432 return OpFConvert(t_scalar_half, value);
2433 }
2434 return value;
2435 }
2436
2437 Id GetFloatFromHalfScalar(Id value) {
2438 if (device.IsFloat16Supported()) {
2439 return OpFConvert(t_float, value);
2440 }
2441 return value;
2442 }
2443
2444 AttributeType GetAttributeType(u32 location) const {
2445 if (stage != ShaderType::Vertex) {
2446 return {Type::Float, t_in_float, t_in_float4};
2447 }
2448 switch (specialization.attribute_types.at(location)) {
2449 case Maxwell::VertexAttribute::Type::SignedNorm:
2450 case Maxwell::VertexAttribute::Type::UnsignedNorm:
2451 case Maxwell::VertexAttribute::Type::UnsignedScaled:
2452 case Maxwell::VertexAttribute::Type::SignedScaled:
2453 case Maxwell::VertexAttribute::Type::Float:
2454 return {Type::Float, t_in_float, t_in_float4};
2455 case Maxwell::VertexAttribute::Type::SignedInt:
2456 return {Type::Int, t_in_int, t_in_int4};
2457 case Maxwell::VertexAttribute::Type::UnsignedInt:
2458 return {Type::Uint, t_in_uint, t_in_uint4};
2459 default:
2460 UNREACHABLE();
2461 return {Type::Float, t_in_float, t_in_float4};
2462 }
2463 }
2464
2465 Id GetTypeDefinition(Type type) const {
2466 switch (type) {
2467 case Type::Bool:
2468 return t_bool;
2469 case Type::Bool2:
2470 return t_bool2;
2471 case Type::Float:
2472 return t_float;
2473 case Type::Int:
2474 return t_int;
2475 case Type::Uint:
2476 return t_uint;
2477 case Type::HalfFloat:
2478 return t_half;
2479 default:
2480 UNREACHABLE();
2481 return {};
2482 }
2483 }
2484
2485 std::array<Id, 4> GetTypeVectorDefinitionLut(Type type) const {
2486 switch (type) {
2487 case Type::Float:
2488 return {t_float, t_float2, t_float3, t_float4};
2489 case Type::Int:
2490 return {t_int, t_int2, t_int3, t_int4};
2491 case Type::Uint:
2492 return {t_uint, t_uint2, t_uint3, t_uint4};
2493 default:
2494 UNIMPLEMENTED();
2495 return {};
2496 }
2497 }
2498
2499 std::tuple<Id, Id> CreateFlowStack() {
2500 // TODO(Rodrigo): Figure out the actual depth of the flow stack, for now it seems unlikely
2501 // that shaders will use 20 nested SSYs and PBKs.
2502 constexpr u32 FLOW_STACK_SIZE = 20;
2503 constexpr auto storage_class = spv::StorageClass::Function;
2504
2505 const Id flow_stack_type = TypeArray(t_uint, Constant(t_uint, FLOW_STACK_SIZE));
2506 const Id stack = OpVariable(TypePointer(storage_class, flow_stack_type), storage_class,
2507 ConstantNull(flow_stack_type));
2508 const Id top = OpVariable(t_func_uint, storage_class, Constant(t_uint, 0));
2509 AddLocalVariable(stack);
2510 AddLocalVariable(top);
2511 return std::tie(stack, top);
2512 }
2513
2514 std::pair<Id, Id> GetFlowStack(Operation operation) {
2515 const auto stack_class = std::get<MetaStackClass>(operation.GetMeta());
2516 switch (stack_class) {
2517 case MetaStackClass::Ssy:
2518 return {ssy_flow_stack, ssy_flow_stack_top};
2519 case MetaStackClass::Pbk:
2520 return {pbk_flow_stack, pbk_flow_stack_top};
2521 }
2522 UNREACHABLE();
2523 return {};
2524 }
2525
2526 Id GetGlobalMemoryPointer(const GmemNode& gmem) {
2527 const Id real = AsUint(Visit(gmem.GetRealAddress()));
2528 const Id base = AsUint(Visit(gmem.GetBaseAddress()));
2529 const Id diff = OpISub(t_uint, real, base);
2530 const Id offset = OpShiftRightLogical(t_uint, diff, Constant(t_uint, 2));
2531 const Id buffer = global_buffers.at(gmem.GetDescriptor());
2532 return OpAccessChain(t_gmem_uint, buffer, Constant(t_uint, 0), offset);
2533 }
2534
2535 Id GetSharedMemoryPointer(const SmemNode& smem) {
2536 ASSERT(stage == ShaderType::Compute);
2537 Id address = AsUint(Visit(smem.GetAddress()));
2538 address = OpShiftRightLogical(t_uint, address, Constant(t_uint, 2U));
2539 return OpAccessChain(t_smem_uint, shared_memory, address);
2540 }
2541
2542 static constexpr std::array operation_decompilers = {
2543 &SPIRVDecompiler::Assign,
2544
2545 &SPIRVDecompiler::Ternary<&Module::OpSelect, Type::Float, Type::Bool, Type::Float,
2546 Type::Float>,
2547
2548 &SPIRVDecompiler::Binary<&Module::OpFAdd, Type::Float>,
2549 &SPIRVDecompiler::Binary<&Module::OpFMul, Type::Float>,
2550 &SPIRVDecompiler::Binary<&Module::OpFDiv, Type::Float>,
2551 &SPIRVDecompiler::Ternary<&Module::OpFma, Type::Float>,
2552 &SPIRVDecompiler::Unary<&Module::OpFNegate, Type::Float>,
2553 &SPIRVDecompiler::Unary<&Module::OpFAbs, Type::Float>,
2554 &SPIRVDecompiler::Ternary<&Module::OpFClamp, Type::Float>,
2555 &SPIRVDecompiler::FCastHalf<0>,
2556 &SPIRVDecompiler::FCastHalf<1>,
2557 &SPIRVDecompiler::Binary<&Module::OpFMin, Type::Float>,
2558 &SPIRVDecompiler::Binary<&Module::OpFMax, Type::Float>,
2559 &SPIRVDecompiler::Unary<&Module::OpCos, Type::Float>,
2560 &SPIRVDecompiler::Unary<&Module::OpSin, Type::Float>,
2561 &SPIRVDecompiler::Unary<&Module::OpExp2, Type::Float>,
2562 &SPIRVDecompiler::Unary<&Module::OpLog2, Type::Float>,
2563 &SPIRVDecompiler::Unary<&Module::OpInverseSqrt, Type::Float>,
2564 &SPIRVDecompiler::Unary<&Module::OpSqrt, Type::Float>,
2565 &SPIRVDecompiler::Unary<&Module::OpRoundEven, Type::Float>,
2566 &SPIRVDecompiler::Unary<&Module::OpFloor, Type::Float>,
2567 &SPIRVDecompiler::Unary<&Module::OpCeil, Type::Float>,
2568 &SPIRVDecompiler::Unary<&Module::OpTrunc, Type::Float>,
2569 &SPIRVDecompiler::Unary<&Module::OpConvertSToF, Type::Float, Type::Int>,
2570 &SPIRVDecompiler::Unary<&Module::OpConvertUToF, Type::Float, Type::Uint>,
2571 &SPIRVDecompiler::FSwizzleAdd,
2572
2573 &SPIRVDecompiler::Binary<&Module::OpIAdd, Type::Int>,
2574 &SPIRVDecompiler::Binary<&Module::OpIMul, Type::Int>,
2575 &SPIRVDecompiler::Binary<&Module::OpSDiv, Type::Int>,
2576 &SPIRVDecompiler::Unary<&Module::OpSNegate, Type::Int>,
2577 &SPIRVDecompiler::Unary<&Module::OpSAbs, Type::Int>,
2578 &SPIRVDecompiler::Binary<&Module::OpSMin, Type::Int>,
2579 &SPIRVDecompiler::Binary<&Module::OpSMax, Type::Int>,
2580
2581 &SPIRVDecompiler::Unary<&Module::OpConvertFToS, Type::Int, Type::Float>,
2582 &SPIRVDecompiler::Unary<&Module::OpBitcast, Type::Int, Type::Uint>,
2583 &SPIRVDecompiler::Binary<&Module::OpShiftLeftLogical, Type::Int, Type::Int, Type::Uint>,
2584 &SPIRVDecompiler::Binary<&Module::OpShiftRightLogical, Type::Int, Type::Int, Type::Uint>,
2585 &SPIRVDecompiler::Binary<&Module::OpShiftRightArithmetic, Type::Int, Type::Int, Type::Uint>,
2586 &SPIRVDecompiler::Binary<&Module::OpBitwiseAnd, Type::Int>,
2587 &SPIRVDecompiler::Binary<&Module::OpBitwiseOr, Type::Int>,
2588 &SPIRVDecompiler::Binary<&Module::OpBitwiseXor, Type::Int>,
2589 &SPIRVDecompiler::Unary<&Module::OpNot, Type::Int>,
2590 &SPIRVDecompiler::Quaternary<&Module::OpBitFieldInsert, Type::Int>,
2591 &SPIRVDecompiler::Ternary<&Module::OpBitFieldSExtract, Type::Int>,
2592 &SPIRVDecompiler::Unary<&Module::OpBitCount, Type::Int>,
2593 &SPIRVDecompiler::Unary<&Module::OpFindSMsb, Type::Int>,
2594
2595 &SPIRVDecompiler::Binary<&Module::OpIAdd, Type::Uint>,
2596 &SPIRVDecompiler::Binary<&Module::OpIMul, Type::Uint>,
2597 &SPIRVDecompiler::Binary<&Module::OpUDiv, Type::Uint>,
2598 &SPIRVDecompiler::Binary<&Module::OpUMin, Type::Uint>,
2599 &SPIRVDecompiler::Binary<&Module::OpUMax, Type::Uint>,
2600 &SPIRVDecompiler::Unary<&Module::OpConvertFToU, Type::Uint, Type::Float>,
2601 &SPIRVDecompiler::Unary<&Module::OpBitcast, Type::Uint, Type::Int>,
2602 &SPIRVDecompiler::Binary<&Module::OpShiftLeftLogical, Type::Uint>,
2603 &SPIRVDecompiler::Binary<&Module::OpShiftRightLogical, Type::Uint>,
2604 &SPIRVDecompiler::Binary<&Module::OpShiftRightLogical, Type::Uint>,
2605 &SPIRVDecompiler::Binary<&Module::OpBitwiseAnd, Type::Uint>,
2606 &SPIRVDecompiler::Binary<&Module::OpBitwiseOr, Type::Uint>,
2607 &SPIRVDecompiler::Binary<&Module::OpBitwiseXor, Type::Uint>,
2608 &SPIRVDecompiler::Unary<&Module::OpNot, Type::Uint>,
2609 &SPIRVDecompiler::Quaternary<&Module::OpBitFieldInsert, Type::Uint>,
2610 &SPIRVDecompiler::Ternary<&Module::OpBitFieldUExtract, Type::Uint>,
2611 &SPIRVDecompiler::Unary<&Module::OpBitCount, Type::Uint>,
2612 &SPIRVDecompiler::Unary<&Module::OpFindUMsb, Type::Uint>,
2613
2614 &SPIRVDecompiler::Binary<&Module::OpFAdd, Type::HalfFloat>,
2615 &SPIRVDecompiler::Binary<&Module::OpFMul, Type::HalfFloat>,
2616 &SPIRVDecompiler::Ternary<&Module::OpFma, Type::HalfFloat>,
2617 &SPIRVDecompiler::Unary<&Module::OpFAbs, Type::HalfFloat>,
2618 &SPIRVDecompiler::HNegate,
2619 &SPIRVDecompiler::HClamp,
2620 &SPIRVDecompiler::HCastFloat,
2621 &SPIRVDecompiler::HUnpack,
2622 &SPIRVDecompiler::HMergeF32,
2623 &SPIRVDecompiler::HMergeHN<0>,
2624 &SPIRVDecompiler::HMergeHN<1>,
2625 &SPIRVDecompiler::HPack2,
2626
2627 &SPIRVDecompiler::LogicalAssign,
2628 &SPIRVDecompiler::Binary<&Module::OpLogicalAnd, Type::Bool>,
2629 &SPIRVDecompiler::Binary<&Module::OpLogicalOr, Type::Bool>,
2630 &SPIRVDecompiler::Binary<&Module::OpLogicalNotEqual, Type::Bool>,
2631 &SPIRVDecompiler::Unary<&Module::OpLogicalNot, Type::Bool>,
2632 &SPIRVDecompiler::Binary<&Module::OpVectorExtractDynamic, Type::Bool, Type::Bool2,
2633 Type::Uint>,
2634 &SPIRVDecompiler::Unary<&Module::OpAll, Type::Bool, Type::Bool2>,
2635
2636 &SPIRVDecompiler::Binary<&Module::OpFOrdLessThan, Type::Bool, Type::Float>,
2637 &SPIRVDecompiler::Binary<&Module::OpFOrdEqual, Type::Bool, Type::Float>,
2638 &SPIRVDecompiler::Binary<&Module::OpFOrdLessThanEqual, Type::Bool, Type::Float>,
2639 &SPIRVDecompiler::Binary<&Module::OpFOrdGreaterThan, Type::Bool, Type::Float>,
2640 &SPIRVDecompiler::Binary<&Module::OpFOrdNotEqual, Type::Bool, Type::Float>,
2641 &SPIRVDecompiler::Binary<&Module::OpFOrdGreaterThanEqual, Type::Bool, Type::Float>,
2642 &SPIRVDecompiler::LogicalFOrdered,
2643 &SPIRVDecompiler::LogicalFUnordered,
2644 &SPIRVDecompiler::Binary<&Module::OpFUnordLessThan, Type::Bool, Type::Float>,
2645 &SPIRVDecompiler::Binary<&Module::OpFUnordEqual, Type::Bool, Type::Float>,
2646 &SPIRVDecompiler::Binary<&Module::OpFUnordLessThanEqual, Type::Bool, Type::Float>,
2647 &SPIRVDecompiler::Binary<&Module::OpFUnordGreaterThan, Type::Bool, Type::Float>,
2648 &SPIRVDecompiler::Binary<&Module::OpFUnordNotEqual, Type::Bool, Type::Float>,
2649 &SPIRVDecompiler::Binary<&Module::OpFUnordGreaterThanEqual, Type::Bool, Type::Float>,
2650
2651 &SPIRVDecompiler::Binary<&Module::OpSLessThan, Type::Bool, Type::Int>,
2652 &SPIRVDecompiler::Binary<&Module::OpIEqual, Type::Bool, Type::Int>,
2653 &SPIRVDecompiler::Binary<&Module::OpSLessThanEqual, Type::Bool, Type::Int>,
2654 &SPIRVDecompiler::Binary<&Module::OpSGreaterThan, Type::Bool, Type::Int>,
2655 &SPIRVDecompiler::Binary<&Module::OpINotEqual, Type::Bool, Type::Int>,
2656 &SPIRVDecompiler::Binary<&Module::OpSGreaterThanEqual, Type::Bool, Type::Int>,
2657
2658 &SPIRVDecompiler::Binary<&Module::OpULessThan, Type::Bool, Type::Uint>,
2659 &SPIRVDecompiler::Binary<&Module::OpIEqual, Type::Bool, Type::Uint>,
2660 &SPIRVDecompiler::Binary<&Module::OpULessThanEqual, Type::Bool, Type::Uint>,
2661 &SPIRVDecompiler::Binary<&Module::OpUGreaterThan, Type::Bool, Type::Uint>,
2662 &SPIRVDecompiler::Binary<&Module::OpINotEqual, Type::Bool, Type::Uint>,
2663 &SPIRVDecompiler::Binary<&Module::OpUGreaterThanEqual, Type::Bool, Type::Uint>,
2664
2665 &SPIRVDecompiler::LogicalAddCarry,
2666
2667 &SPIRVDecompiler::Binary<&Module::OpFOrdLessThan, Type::Bool2, Type::HalfFloat>,
2668 &SPIRVDecompiler::Binary<&Module::OpFOrdEqual, Type::Bool2, Type::HalfFloat>,
2669 &SPIRVDecompiler::Binary<&Module::OpFOrdLessThanEqual, Type::Bool2, Type::HalfFloat>,
2670 &SPIRVDecompiler::Binary<&Module::OpFOrdGreaterThan, Type::Bool2, Type::HalfFloat>,
2671 &SPIRVDecompiler::Binary<&Module::OpFOrdNotEqual, Type::Bool2, Type::HalfFloat>,
2672 &SPIRVDecompiler::Binary<&Module::OpFOrdGreaterThanEqual, Type::Bool2, Type::HalfFloat>,
2673 // TODO(Rodrigo): Should these use the OpFUnord* variants?
2674 &SPIRVDecompiler::Binary<&Module::OpFOrdLessThan, Type::Bool2, Type::HalfFloat>,
2675 &SPIRVDecompiler::Binary<&Module::OpFOrdEqual, Type::Bool2, Type::HalfFloat>,
2676 &SPIRVDecompiler::Binary<&Module::OpFOrdLessThanEqual, Type::Bool2, Type::HalfFloat>,
2677 &SPIRVDecompiler::Binary<&Module::OpFOrdGreaterThan, Type::Bool2, Type::HalfFloat>,
2678 &SPIRVDecompiler::Binary<&Module::OpFOrdNotEqual, Type::Bool2, Type::HalfFloat>,
2679 &SPIRVDecompiler::Binary<&Module::OpFOrdGreaterThanEqual, Type::Bool2, Type::HalfFloat>,
2680
2681 &SPIRVDecompiler::Texture,
2682 &SPIRVDecompiler::TextureLod,
2683 &SPIRVDecompiler::TextureGather,
2684 &SPIRVDecompiler::TextureQueryDimensions,
2685 &SPIRVDecompiler::TextureQueryLod,
2686 &SPIRVDecompiler::TexelFetch,
2687 &SPIRVDecompiler::TextureGradient,
2688
2689 &SPIRVDecompiler::ImageLoad,
2690 &SPIRVDecompiler::ImageStore,
2691 &SPIRVDecompiler::AtomicImage<&Module::OpAtomicIAdd>,
2692 &SPIRVDecompiler::AtomicImage<&Module::OpAtomicAnd>,
2693 &SPIRVDecompiler::AtomicImage<&Module::OpAtomicOr>,
2694 &SPIRVDecompiler::AtomicImage<&Module::OpAtomicXor>,
2695 &SPIRVDecompiler::AtomicImage<&Module::OpAtomicExchange>,
2696
2697 &SPIRVDecompiler::Atomic<&Module::OpAtomicExchange>,
2698 &SPIRVDecompiler::Atomic<&Module::OpAtomicIAdd>,
2699 &SPIRVDecompiler::Atomic<&Module::OpAtomicUMin>,
2700 &SPIRVDecompiler::Atomic<&Module::OpAtomicUMax>,
2701 &SPIRVDecompiler::Atomic<&Module::OpAtomicAnd>,
2702 &SPIRVDecompiler::Atomic<&Module::OpAtomicOr>,
2703 &SPIRVDecompiler::Atomic<&Module::OpAtomicXor>,
2704
2705 &SPIRVDecompiler::Atomic<&Module::OpAtomicExchange>,
2706 &SPIRVDecompiler::Atomic<&Module::OpAtomicIAdd>,
2707 &SPIRVDecompiler::Atomic<&Module::OpAtomicSMin>,
2708 &SPIRVDecompiler::Atomic<&Module::OpAtomicSMax>,
2709 &SPIRVDecompiler::Atomic<&Module::OpAtomicAnd>,
2710 &SPIRVDecompiler::Atomic<&Module::OpAtomicOr>,
2711 &SPIRVDecompiler::Atomic<&Module::OpAtomicXor>,
2712
2713 &SPIRVDecompiler::Reduce<&Module::OpAtomicIAdd>,
2714 &SPIRVDecompiler::Reduce<&Module::OpAtomicUMin>,
2715 &SPIRVDecompiler::Reduce<&Module::OpAtomicUMax>,
2716 &SPIRVDecompiler::Reduce<&Module::OpAtomicAnd>,
2717 &SPIRVDecompiler::Reduce<&Module::OpAtomicOr>,
2718 &SPIRVDecompiler::Reduce<&Module::OpAtomicXor>,
2719
2720 &SPIRVDecompiler::Reduce<&Module::OpAtomicIAdd>,
2721 &SPIRVDecompiler::Reduce<&Module::OpAtomicSMin>,
2722 &SPIRVDecompiler::Reduce<&Module::OpAtomicSMax>,
2723 &SPIRVDecompiler::Reduce<&Module::OpAtomicAnd>,
2724 &SPIRVDecompiler::Reduce<&Module::OpAtomicOr>,
2725 &SPIRVDecompiler::Reduce<&Module::OpAtomicXor>,
2726
2727 &SPIRVDecompiler::Branch,
2728 &SPIRVDecompiler::BranchIndirect,
2729 &SPIRVDecompiler::PushFlowStack,
2730 &SPIRVDecompiler::PopFlowStack,
2731 &SPIRVDecompiler::Exit,
2732 &SPIRVDecompiler::Discard,
2733
2734 &SPIRVDecompiler::EmitVertex,
2735 &SPIRVDecompiler::EndPrimitive,
2736
2737 &SPIRVDecompiler::InvocationId,
2738 &SPIRVDecompiler::YNegate,
2739 &SPIRVDecompiler::LocalInvocationId<0>,
2740 &SPIRVDecompiler::LocalInvocationId<1>,
2741 &SPIRVDecompiler::LocalInvocationId<2>,
2742 &SPIRVDecompiler::WorkGroupId<0>,
2743 &SPIRVDecompiler::WorkGroupId<1>,
2744 &SPIRVDecompiler::WorkGroupId<2>,
2745
2746 &SPIRVDecompiler::BallotThread,
2747 &SPIRVDecompiler::Vote<&Module::OpSubgroupAllKHR>,
2748 &SPIRVDecompiler::Vote<&Module::OpSubgroupAnyKHR>,
2749 &SPIRVDecompiler::Vote<&Module::OpSubgroupAllEqualKHR>,
2750
2751 &SPIRVDecompiler::ThreadId,
2752 &SPIRVDecompiler::ThreadMask<0>, // Eq
2753 &SPIRVDecompiler::ThreadMask<1>, // Ge
2754 &SPIRVDecompiler::ThreadMask<2>, // Gt
2755 &SPIRVDecompiler::ThreadMask<3>, // Le
2756 &SPIRVDecompiler::ThreadMask<4>, // Lt
2757 &SPIRVDecompiler::ShuffleIndexed,
2758
2759 &SPIRVDecompiler::Barrier,
2760 &SPIRVDecompiler::MemoryBarrier<spv::Scope::Workgroup>,
2761 &SPIRVDecompiler::MemoryBarrier<spv::Scope::Device>,
2762 };
2763 static_assert(operation_decompilers.size() == static_cast<std::size_t>(OperationCode::Amount));
2764
2765 const Device& device;
2766 const ShaderIR& ir;
2767 const ShaderType stage;
2768 const Tegra::Shader::Header header;
2769 const Registry& registry;
2770 const Specialization& specialization;
2771 std::unordered_map<u8, VaryingTFB> transform_feedback;
2772
2773 const Id t_void = Name(TypeVoid(), "void");
2774
2775 const Id t_bool = Name(TypeBool(), "bool");
2776 const Id t_bool2 = Name(TypeVector(t_bool, 2), "bool2");
2777
2778 const Id t_int = Name(TypeInt(32, true), "int");
2779 const Id t_int2 = Name(TypeVector(t_int, 2), "int2");
2780 const Id t_int3 = Name(TypeVector(t_int, 3), "int3");
2781 const Id t_int4 = Name(TypeVector(t_int, 4), "int4");
2782
2783 const Id t_uint = Name(TypeInt(32, false), "uint");
2784 const Id t_uint2 = Name(TypeVector(t_uint, 2), "uint2");
2785 const Id t_uint3 = Name(TypeVector(t_uint, 3), "uint3");
2786 const Id t_uint4 = Name(TypeVector(t_uint, 4), "uint4");
2787
2788 const Id t_float = Name(TypeFloat(32), "float");
2789 const Id t_float2 = Name(TypeVector(t_float, 2), "float2");
2790 const Id t_float3 = Name(TypeVector(t_float, 3), "float3");
2791 const Id t_float4 = Name(TypeVector(t_float, 4), "float4");
2792
2793 const Id t_prv_bool = Name(TypePointer(spv::StorageClass::Private, t_bool), "prv_bool");
2794 const Id t_prv_float = Name(TypePointer(spv::StorageClass::Private, t_float), "prv_float");
2795
2796 const Id t_func_uint = Name(TypePointer(spv::StorageClass::Function, t_uint), "func_uint");
2797
2798 const Id t_in_bool = Name(TypePointer(spv::StorageClass::Input, t_bool), "in_bool");
2799 const Id t_in_int = Name(TypePointer(spv::StorageClass::Input, t_int), "in_int");
2800 const Id t_in_int4 = Name(TypePointer(spv::StorageClass::Input, t_int4), "in_int4");
2801 const Id t_in_uint = Name(TypePointer(spv::StorageClass::Input, t_uint), "in_uint");
2802 const Id t_in_uint3 = Name(TypePointer(spv::StorageClass::Input, t_uint3), "in_uint3");
2803 const Id t_in_uint4 = Name(TypePointer(spv::StorageClass::Input, t_uint4), "in_uint4");
2804 const Id t_in_float = Name(TypePointer(spv::StorageClass::Input, t_float), "in_float");
2805 const Id t_in_float2 = Name(TypePointer(spv::StorageClass::Input, t_float2), "in_float2");
2806 const Id t_in_float3 = Name(TypePointer(spv::StorageClass::Input, t_float3), "in_float3");
2807 const Id t_in_float4 = Name(TypePointer(spv::StorageClass::Input, t_float4), "in_float4");
2808
2809 const Id t_out_int = Name(TypePointer(spv::StorageClass::Output, t_int), "out_int");
2810
2811 const Id t_out_float = Name(TypePointer(spv::StorageClass::Output, t_float), "out_float");
2812 const Id t_out_float4 = Name(TypePointer(spv::StorageClass::Output, t_float4), "out_float4");
2813
2814 const Id t_cbuf_float = TypePointer(spv::StorageClass::Uniform, t_float);
2815 const Id t_cbuf_std140 = Decorate(
2816 Name(TypeArray(t_float4, Constant(t_uint, MaxConstBufferElements)), "CbufStd140Array"),
2817 spv::Decoration::ArrayStride, 16U);
2818 const Id t_cbuf_scalar = Decorate(
2819 Name(TypeArray(t_float, Constant(t_uint, MaxConstBufferFloats)), "CbufScalarArray"),
2820 spv::Decoration::ArrayStride, 4U);
2821 const Id t_cbuf_std140_struct = MemberDecorate(
2822 Decorate(TypeStruct(t_cbuf_std140), spv::Decoration::Block), 0, spv::Decoration::Offset, 0);
2823 const Id t_cbuf_scalar_struct = MemberDecorate(
2824 Decorate(TypeStruct(t_cbuf_scalar), spv::Decoration::Block), 0, spv::Decoration::Offset, 0);
2825 const Id t_cbuf_std140_ubo = TypePointer(spv::StorageClass::Uniform, t_cbuf_std140_struct);
2826 const Id t_cbuf_scalar_ubo = TypePointer(spv::StorageClass::Uniform, t_cbuf_scalar_struct);
2827
2828 Id t_smem_uint{};
2829
2830 const Id t_gmem_uint = TypePointer(spv::StorageClass::StorageBuffer, t_uint);
2831 const Id t_gmem_array =
2832 Name(Decorate(TypeRuntimeArray(t_uint), spv::Decoration::ArrayStride, 4U), "GmemArray");
2833 const Id t_gmem_struct = MemberDecorate(
2834 Decorate(TypeStruct(t_gmem_array), spv::Decoration::Block), 0, spv::Decoration::Offset, 0);
2835 const Id t_gmem_ssbo = TypePointer(spv::StorageClass::StorageBuffer, t_gmem_struct);
2836
2837 const Id t_image_uint = TypePointer(spv::StorageClass::Image, t_uint);
2838
2839 const Id v_float_zero = Constant(t_float, 0.0f);
2840 const Id v_float_one = Constant(t_float, 1.0f);
2841 const Id v_uint_zero = Constant(t_uint, 0);
2842
2843 // Nvidia uses these defaults for varyings (e.g. position and generic attributes)
2844 const Id v_varying_default =
2845 ConstantComposite(t_float4, v_float_zero, v_float_zero, v_float_zero, v_float_one);
2846
2847 const Id v_true = ConstantTrue(t_bool);
2848 const Id v_false = ConstantFalse(t_bool);
2849
2850 Id t_scalar_half{};
2851 Id t_half{};
2852
2853 Id out_vertex{};
2854 Id in_vertex{};
2855 std::map<u32, Id> registers;
2856 std::map<u32, Id> custom_variables;
2857 std::map<Tegra::Shader::Pred, Id> predicates;
2858 std::map<u32, Id> flow_variables;
2859 Id local_memory{};
2860 Id shared_memory{};
2861 std::array<Id, INTERNAL_FLAGS_COUNT> internal_flags{};
2862 std::map<Attribute::Index, Id> input_attributes;
2863 std::unordered_map<u8, GenericVaryingDescription> output_attributes;
2864 std::map<u32, Id> constant_buffers;
2865 std::map<GlobalMemoryBase, Id> global_buffers;
2866 std::map<u32, TexelBuffer> uniform_texels;
2867 std::map<u32, SampledImage> sampled_images;
2868 std::map<u32, StorageImage> images;
2869
2870 std::array<Id, Maxwell::NumRenderTargets> frag_colors{};
2871 Id instance_index{};
2872 Id vertex_index{};
2873 Id base_instance{};
2874 Id base_vertex{};
2875 Id frag_depth{};
2876 Id frag_coord{};
2877 Id front_facing{};
2878 Id point_coord{};
2879 Id tess_level_outer{};
2880 Id tess_level_inner{};
2881 Id tess_coord{};
2882 Id invocation_id{};
2883 Id workgroup_id{};
2884 Id local_invocation_id{};
2885 Id thread_id{};
2886 std::array<Id, 5> thread_masks{}; // eq, ge, gt, le, lt
2887
2888 VertexIndices in_indices;
2889 VertexIndices out_indices;
2890
2891 std::vector<Id> interfaces;
2892
2893 Id jmp_to{};
2894 Id ssy_flow_stack_top{};
2895 Id pbk_flow_stack_top{};
2896 Id ssy_flow_stack{};
2897 Id pbk_flow_stack{};
2898 Id continue_label{};
2899 std::map<u32, Id> labels;
2900
2901 bool conditional_branch_set{};
2902 bool inside_branch{};
2903};
2904
2905class ExprDecompiler {
2906public:
2907 explicit ExprDecompiler(SPIRVDecompiler& decomp_) : decomp{decomp_} {}
2908
2909 Id operator()(const ExprAnd& expr) {
2910 const Id type_def = decomp.GetTypeDefinition(Type::Bool);
2911 const Id op1 = Visit(expr.operand1);
2912 const Id op2 = Visit(expr.operand2);
2913 return decomp.OpLogicalAnd(type_def, op1, op2);
2914 }
2915
2916 Id operator()(const ExprOr& expr) {
2917 const Id type_def = decomp.GetTypeDefinition(Type::Bool);
2918 const Id op1 = Visit(expr.operand1);
2919 const Id op2 = Visit(expr.operand2);
2920 return decomp.OpLogicalOr(type_def, op1, op2);
2921 }
2922
2923 Id operator()(const ExprNot& expr) {
2924 const Id type_def = decomp.GetTypeDefinition(Type::Bool);
2925 const Id op1 = Visit(expr.operand1);
2926 return decomp.OpLogicalNot(type_def, op1);
2927 }
2928
2929 Id operator()(const ExprPredicate& expr) {
2930 const auto pred = static_cast<Tegra::Shader::Pred>(expr.predicate);
2931 return decomp.OpLoad(decomp.t_bool, decomp.predicates.at(pred));
2932 }
2933
2934 Id operator()(const ExprCondCode& expr) {
2935 return decomp.AsBool(decomp.Visit(decomp.ir.GetConditionCode(expr.cc)));
2936 }
2937
2938 Id operator()(const ExprVar& expr) {
2939 return decomp.OpLoad(decomp.t_bool, decomp.flow_variables.at(expr.var_index));
2940 }
2941
2942 Id operator()(const ExprBoolean& expr) {
2943 return expr.value ? decomp.v_true : decomp.v_false;
2944 }
2945
2946 Id operator()(const ExprGprEqual& expr) {
2947 const Id target = decomp.Constant(decomp.t_uint, expr.value);
2948 Id gpr = decomp.OpLoad(decomp.t_float, decomp.registers.at(expr.gpr));
2949 gpr = decomp.OpBitcast(decomp.t_uint, gpr);
2950 return decomp.OpIEqual(decomp.t_bool, gpr, target);
2951 }
2952
2953 Id Visit(const Expr& node) {
2954 return std::visit(*this, *node);
2955 }
2956
2957private:
2958 SPIRVDecompiler& decomp;
2959};
2960
2961class ASTDecompiler {
2962public:
2963 explicit ASTDecompiler(SPIRVDecompiler& decomp_) : decomp{decomp_} {}
2964
2965 void operator()(const ASTProgram& ast) {
2966 ASTNode current = ast.nodes.GetFirst();
2967 while (current) {
2968 Visit(current);
2969 current = current->GetNext();
2970 }
2971 }
2972
2973 void operator()(const ASTIfThen& ast) {
2974 ExprDecompiler expr_parser{decomp};
2975 const Id condition = expr_parser.Visit(ast.condition);
2976 const Id then_label = decomp.OpLabel();
2977 const Id endif_label = decomp.OpLabel();
2978 decomp.OpSelectionMerge(endif_label, spv::SelectionControlMask::MaskNone);
2979 decomp.OpBranchConditional(condition, then_label, endif_label);
2980 decomp.AddLabel(then_label);
2981 ASTNode current = ast.nodes.GetFirst();
2982 while (current) {
2983 Visit(current);
2984 current = current->GetNext();
2985 }
2986 decomp.OpBranch(endif_label);
2987 decomp.AddLabel(endif_label);
2988 }
2989
2990 void operator()([[maybe_unused]] const ASTIfElse& ast) {
2991 UNREACHABLE();
2992 }
2993
2994 void operator()([[maybe_unused]] const ASTBlockEncoded& ast) {
2995 UNREACHABLE();
2996 }
2997
2998 void operator()(const ASTBlockDecoded& ast) {
2999 decomp.VisitBasicBlock(ast.nodes);
3000 }
3001
3002 void operator()(const ASTVarSet& ast) {
3003 ExprDecompiler expr_parser{decomp};
3004 const Id condition = expr_parser.Visit(ast.condition);
3005 decomp.OpStore(decomp.flow_variables.at(ast.index), condition);
3006 }
3007
3008 void operator()([[maybe_unused]] const ASTLabel& ast) {
3009 // Do nothing
3010 }
3011
3012 void operator()([[maybe_unused]] const ASTGoto& ast) {
3013 UNREACHABLE();
3014 }
3015
3016 void operator()(const ASTDoWhile& ast) {
3017 const Id loop_label = decomp.OpLabel();
3018 const Id endloop_label = decomp.OpLabel();
3019 const Id loop_start_block = decomp.OpLabel();
3020 const Id loop_continue_block = decomp.OpLabel();
3021 current_loop_exit = endloop_label;
3022 decomp.OpBranch(loop_label);
3023 decomp.AddLabel(loop_label);
3024 decomp.OpLoopMerge(endloop_label, loop_continue_block, spv::LoopControlMask::MaskNone);
3025 decomp.OpBranch(loop_start_block);
3026 decomp.AddLabel(loop_start_block);
3027 ASTNode current = ast.nodes.GetFirst();
3028 while (current) {
3029 Visit(current);
3030 current = current->GetNext();
3031 }
3032 decomp.OpBranch(loop_continue_block);
3033 decomp.AddLabel(loop_continue_block);
3034 ExprDecompiler expr_parser{decomp};
3035 const Id condition = expr_parser.Visit(ast.condition);
3036 decomp.OpBranchConditional(condition, loop_label, endloop_label);
3037 decomp.AddLabel(endloop_label);
3038 }
3039
3040 void operator()(const ASTReturn& ast) {
3041 if (!VideoCommon::Shader::ExprIsTrue(ast.condition)) {
3042 ExprDecompiler expr_parser{decomp};
3043 const Id condition = expr_parser.Visit(ast.condition);
3044 const Id then_label = decomp.OpLabel();
3045 const Id endif_label = decomp.OpLabel();
3046 decomp.OpSelectionMerge(endif_label, spv::SelectionControlMask::MaskNone);
3047 decomp.OpBranchConditional(condition, then_label, endif_label);
3048 decomp.AddLabel(then_label);
3049 if (ast.kills) {
3050 decomp.OpKill();
3051 } else {
3052 decomp.PreExit();
3053 decomp.OpReturn();
3054 }
3055 decomp.AddLabel(endif_label);
3056 } else {
3057 const Id next_block = decomp.OpLabel();
3058 decomp.OpBranch(next_block);
3059 decomp.AddLabel(next_block);
3060 if (ast.kills) {
3061 decomp.OpKill();
3062 } else {
3063 decomp.PreExit();
3064 decomp.OpReturn();
3065 }
3066 decomp.AddLabel(decomp.OpLabel());
3067 }
3068 }
3069
3070 void operator()(const ASTBreak& ast) {
3071 if (!VideoCommon::Shader::ExprIsTrue(ast.condition)) {
3072 ExprDecompiler expr_parser{decomp};
3073 const Id condition = expr_parser.Visit(ast.condition);
3074 const Id then_label = decomp.OpLabel();
3075 const Id endif_label = decomp.OpLabel();
3076 decomp.OpSelectionMerge(endif_label, spv::SelectionControlMask::MaskNone);
3077 decomp.OpBranchConditional(condition, then_label, endif_label);
3078 decomp.AddLabel(then_label);
3079 decomp.OpBranch(current_loop_exit);
3080 decomp.AddLabel(endif_label);
3081 } else {
3082 const Id next_block = decomp.OpLabel();
3083 decomp.OpBranch(next_block);
3084 decomp.AddLabel(next_block);
3085 decomp.OpBranch(current_loop_exit);
3086 decomp.AddLabel(decomp.OpLabel());
3087 }
3088 }
3089
3090 void Visit(const ASTNode& node) {
3091 std::visit(*this, *node->GetInnerData());
3092 }
3093
3094private:
3095 SPIRVDecompiler& decomp;
3096 Id current_loop_exit{};
3097};
3098
3099void SPIRVDecompiler::DecompileAST() {
3100 const u32 num_flow_variables = ir.GetASTNumVariables();
3101 for (u32 i = 0; i < num_flow_variables; i++) {
3102 const Id id = OpVariable(t_prv_bool, spv::StorageClass::Private, v_false);
3103 Name(id, fmt::format("flow_var_{}", i));
3104 flow_variables.emplace(i, AddGlobalVariable(id));
3105 }
3106
3107 DefinePrologue();
3108
3109 const ASTNode program = ir.GetASTProgram();
3110 ASTDecompiler decompiler{*this};
3111 decompiler.Visit(program);
3112
3113 const Id next_block = OpLabel();
3114 OpBranch(next_block);
3115 AddLabel(next_block);
3116}
3117
3118} // Anonymous namespace
3119
3120ShaderEntries GenerateShaderEntries(const VideoCommon::Shader::ShaderIR& ir) {
3121 ShaderEntries entries;
3122 for (const auto& cbuf : ir.GetConstantBuffers()) {
3123 entries.const_buffers.emplace_back(cbuf.second, cbuf.first);
3124 }
3125 for (const auto& [base, usage] : ir.GetGlobalMemory()) {
3126 entries.global_buffers.emplace_back(GlobalBufferEntry{
3127 .cbuf_index = base.cbuf_index,
3128 .cbuf_offset = base.cbuf_offset,
3129 .is_written = usage.is_written,
3130 });
3131 }
3132 for (const auto& sampler : ir.GetSamplers()) {
3133 if (sampler.is_buffer) {
3134 entries.uniform_texels.emplace_back(sampler);
3135 } else {
3136 entries.samplers.emplace_back(sampler);
3137 }
3138 }
3139 for (const auto& image : ir.GetImages()) {
3140 if (image.type == Tegra::Shader::ImageType::TextureBuffer) {
3141 entries.storage_texels.emplace_back(image);
3142 } else {
3143 entries.images.emplace_back(image);
3144 }
3145 }
3146 for (const auto& attribute : ir.GetInputAttributes()) {
3147 if (IsGenericAttribute(attribute)) {
3148 entries.attributes.insert(GetGenericAttributeLocation(attribute));
3149 }
3150 }
3151 for (const auto& buffer : entries.const_buffers) {
3152 entries.enabled_uniform_buffers |= 1U << buffer.GetIndex();
3153 }
3154 entries.clip_distances = ir.GetClipDistances();
3155 entries.shader_length = ir.GetLength();
3156 entries.uses_warps = ir.UsesWarps();
3157 return entries;
3158}
3159
3160std::vector<u32> Decompile(const Device& device, const VideoCommon::Shader::ShaderIR& ir,
3161 ShaderType stage, const VideoCommon::Shader::Registry& registry,
3162 const Specialization& specialization) {
3163 return SPIRVDecompiler(device, ir, stage, registry, specialization).Assemble();
3164}
3165
3166} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.h b/src/video_core/renderer_vulkan/vk_shader_decompiler.h
deleted file mode 100644
index 5d94132a5..000000000
--- a/src/video_core/renderer_vulkan/vk_shader_decompiler.h
+++ /dev/null
@@ -1,99 +0,0 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <array>
8#include <set>
9#include <vector>
10
11#include "common/common_types.h"
12#include "video_core/engines/maxwell_3d.h"
13#include "video_core/engines/shader_type.h"
14#include "video_core/shader/registry.h"
15#include "video_core/shader/shader_ir.h"
16
17namespace Vulkan {
18
19class Device;
20
21using Maxwell = Tegra::Engines::Maxwell3D::Regs;
22using UniformTexelEntry = VideoCommon::Shader::SamplerEntry;
23using SamplerEntry = VideoCommon::Shader::SamplerEntry;
24using StorageTexelEntry = VideoCommon::Shader::ImageEntry;
25using ImageEntry = VideoCommon::Shader::ImageEntry;
26
27constexpr u32 DESCRIPTOR_SET = 0;
28
29class ConstBufferEntry : public VideoCommon::Shader::ConstBuffer {
30public:
31 explicit constexpr ConstBufferEntry(const ConstBuffer& entry_, u32 index_)
32 : ConstBuffer{entry_}, index{index_} {}
33
34 constexpr u32 GetIndex() const {
35 return index;
36 }
37
38private:
39 u32 index{};
40};
41
42struct GlobalBufferEntry {
43 u32 cbuf_index{};
44 u32 cbuf_offset{};
45 bool is_written{};
46};
47
48struct ShaderEntries {
49 u32 NumBindings() const {
50 return static_cast<u32>(const_buffers.size() + global_buffers.size() +
51 uniform_texels.size() + samplers.size() + storage_texels.size() +
52 images.size());
53 }
54
55 std::vector<ConstBufferEntry> const_buffers;
56 std::vector<GlobalBufferEntry> global_buffers;
57 std::vector<UniformTexelEntry> uniform_texels;
58 std::vector<SamplerEntry> samplers;
59 std::vector<StorageTexelEntry> storage_texels;
60 std::vector<ImageEntry> images;
61 std::set<u32> attributes;
62 std::array<bool, Maxwell::NumClipDistances> clip_distances{};
63 std::size_t shader_length{};
64 u32 enabled_uniform_buffers{};
65 bool uses_warps{};
66};
67
68struct Specialization final {
69 u32 base_binding{};
70
71 // Compute specific
72 std::array<u32, 3> workgroup_size{};
73 u32 shared_memory_size{};
74
75 // Graphics specific
76 std::optional<float> point_size;
77 std::bitset<Maxwell::NumVertexAttributes> enabled_attributes;
78 std::array<Maxwell::VertexAttribute::Type, Maxwell::NumVertexAttributes> attribute_types{};
79 bool ndc_minus_one_to_one{};
80 bool early_fragment_tests{};
81 float alpha_test_ref{};
82 Maxwell::ComparisonOp alpha_test_func{};
83};
84// Old gcc versions don't consider this trivially copyable.
85// static_assert(std::is_trivially_copyable_v<Specialization>);
86
87struct SPIRVShader {
88 std::vector<u32> code;
89 ShaderEntries entries;
90};
91
92ShaderEntries GenerateShaderEntries(const VideoCommon::Shader::ShaderIR& ir);
93
94std::vector<u32> Decompile(const Device& device, const VideoCommon::Shader::ShaderIR& ir,
95 Tegra::Engines::ShaderType stage,
96 const VideoCommon::Shader::Registry& registry,
97 const Specialization& specialization);
98
99} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp
index 0412b5234..555b12ed7 100644
--- a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp
+++ b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp
@@ -91,7 +91,7 @@ StagingBufferPool::StagingBufferPool(const Device& device_, MemoryAllocator& mem
91 .flags = 0, 91 .flags = 0,
92 .size = STREAM_BUFFER_SIZE, 92 .size = STREAM_BUFFER_SIZE,
93 .usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | 93 .usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT |
94 VK_BUFFER_USAGE_INDEX_BUFFER_BIT, 94 VK_BUFFER_USAGE_INDEX_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT,
95 .sharingMode = VK_SHARING_MODE_EXCLUSIVE, 95 .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
96 .queueFamilyIndexCount = 0, 96 .queueFamilyIndexCount = 0,
97 .pQueueFamilyIndices = nullptr, 97 .pQueueFamilyIndices = nullptr,
diff --git a/src/video_core/renderer_vulkan/vk_state_tracker.cpp b/src/video_core/renderer_vulkan/vk_state_tracker.cpp
index 956f86845..e3b7dd61c 100644
--- a/src/video_core/renderer_vulkan/vk_state_tracker.cpp
+++ b/src/video_core/renderer_vulkan/vk_state_tracker.cpp
@@ -29,9 +29,10 @@ using Flags = Maxwell3D::DirtyState::Flags;
29 29
30Flags MakeInvalidationFlags() { 30Flags MakeInvalidationFlags() {
31 static constexpr int INVALIDATION_FLAGS[]{ 31 static constexpr int INVALIDATION_FLAGS[]{
32 Viewports, Scissors, DepthBias, BlendConstants, DepthBounds, 32 Viewports, Scissors, DepthBias, BlendConstants, DepthBounds,
33 StencilProperties, CullMode, DepthBoundsEnable, DepthTestEnable, DepthWriteEnable, 33 StencilProperties, LineWidth, CullMode, DepthBoundsEnable, DepthTestEnable,
34 DepthCompareOp, FrontFace, StencilOp, StencilTestEnable, VertexBuffers, 34 DepthWriteEnable, DepthCompareOp, FrontFace, StencilOp, StencilTestEnable,
35 VertexBuffers, VertexInput,
35 }; 36 };
36 Flags flags{}; 37 Flags flags{};
37 for (const int flag : INVALIDATION_FLAGS) { 38 for (const int flag : INVALIDATION_FLAGS) {
@@ -40,6 +41,12 @@ Flags MakeInvalidationFlags() {
40 for (int index = VertexBuffer0; index <= VertexBuffer31; ++index) { 41 for (int index = VertexBuffer0; index <= VertexBuffer31; ++index) {
41 flags[index] = true; 42 flags[index] = true;
42 } 43 }
44 for (int index = VertexAttribute0; index <= VertexAttribute31; ++index) {
45 flags[index] = true;
46 }
47 for (int index = VertexBinding0; index <= VertexBinding31; ++index) {
48 flags[index] = true;
49 }
43 return flags; 50 return flags;
44} 51}
45 52
@@ -79,6 +86,11 @@ void SetupDirtyStencilProperties(Tables& tables) {
79 table[OFF(stencil_back_func_mask)] = StencilProperties; 86 table[OFF(stencil_back_func_mask)] = StencilProperties;
80} 87}
81 88
89void SetupDirtyLineWidth(Tables& tables) {
90 tables[0][OFF(line_width_smooth)] = LineWidth;
91 tables[0][OFF(line_width_aliased)] = LineWidth;
92}
93
82void SetupDirtyCullMode(Tables& tables) { 94void SetupDirtyCullMode(Tables& tables) {
83 auto& table = tables[0]; 95 auto& table = tables[0];
84 table[OFF(cull_face)] = CullMode; 96 table[OFF(cull_face)] = CullMode;
@@ -134,31 +146,38 @@ void SetupDirtyBlending(Tables& tables) {
134 FillBlock(tables[0], OFF(independent_blend), NUM(independent_blend), Blending); 146 FillBlock(tables[0], OFF(independent_blend), NUM(independent_blend), Blending);
135} 147}
136 148
137void SetupDirtyInstanceDivisors(Tables& tables) { 149void SetupDirtyViewportSwizzles(Tables& tables) {
138 static constexpr size_t divisor_offset = 3; 150 static constexpr size_t swizzle_offset = 6;
139 for (size_t index = 0; index < Regs::NumVertexArrays; ++index) { 151 for (size_t index = 0; index < Regs::NumViewports; ++index) {
140 tables[0][OFF(instanced_arrays) + index] = InstanceDivisors; 152 tables[0][OFF(viewport_transform) + index * NUM(viewport_transform[0]) + swizzle_offset] =
141 tables[0][OFF(vertex_array) + index * NUM(vertex_array[0]) + divisor_offset] = 153 ViewportSwizzles;
142 InstanceDivisors;
143 } 154 }
144} 155}
145 156
146void SetupDirtyVertexAttributes(Tables& tables) { 157void SetupDirtyVertexAttributes(Tables& tables) {
147 FillBlock(tables[0], OFF(vertex_attrib_format), NUM(vertex_attrib_format), VertexAttributes); 158 for (size_t i = 0; i < Regs::NumVertexAttributes; ++i) {
159 const size_t offset = OFF(vertex_attrib_format) + i * NUM(vertex_attrib_format[0]);
160 FillBlock(tables[0], offset, NUM(vertex_attrib_format[0]), VertexAttribute0 + i);
161 }
162 FillBlock(tables[1], OFF(vertex_attrib_format), Regs::NumVertexAttributes, VertexInput);
148} 163}
149 164
150void SetupDirtyViewportSwizzles(Tables& tables) { 165void SetupDirtyVertexBindings(Tables& tables) {
151 static constexpr size_t swizzle_offset = 6; 166 // Do NOT include stride here, it's implicit in VertexBuffer
152 for (size_t index = 0; index < Regs::NumViewports; ++index) { 167 static constexpr size_t divisor_offset = 3;
153 tables[0][OFF(viewport_transform) + index * NUM(viewport_transform[0]) + swizzle_offset] = 168 for (size_t i = 0; i < Regs::NumVertexArrays; ++i) {
154 ViewportSwizzles; 169 const u8 flag = static_cast<u8>(VertexBinding0 + i);
170 tables[0][OFF(instanced_arrays) + i] = VertexInput;
171 tables[1][OFF(instanced_arrays) + i] = flag;
172 tables[0][OFF(vertex_array) + i * NUM(vertex_array[0]) + divisor_offset] = VertexInput;
173 tables[1][OFF(vertex_array) + i * NUM(vertex_array[0]) + divisor_offset] = flag;
155 } 174 }
156} 175}
157} // Anonymous namespace 176} // Anonymous namespace
158 177
159StateTracker::StateTracker(Tegra::GPU& gpu) 178StateTracker::StateTracker(Tegra::GPU& gpu)
160 : flags{gpu.Maxwell3D().dirty.flags}, invalidation_flags{MakeInvalidationFlags()} { 179 : flags{gpu.Maxwell3D().dirty.flags}, invalidation_flags{MakeInvalidationFlags()} {
161 auto& tables = gpu.Maxwell3D().dirty.tables; 180 auto& tables{gpu.Maxwell3D().dirty.tables};
162 SetupDirtyFlags(tables); 181 SetupDirtyFlags(tables);
163 SetupDirtyViewports(tables); 182 SetupDirtyViewports(tables);
164 SetupDirtyScissors(tables); 183 SetupDirtyScissors(tables);
@@ -166,6 +185,7 @@ StateTracker::StateTracker(Tegra::GPU& gpu)
166 SetupDirtyBlendConstants(tables); 185 SetupDirtyBlendConstants(tables);
167 SetupDirtyDepthBounds(tables); 186 SetupDirtyDepthBounds(tables);
168 SetupDirtyStencilProperties(tables); 187 SetupDirtyStencilProperties(tables);
188 SetupDirtyLineWidth(tables);
169 SetupDirtyCullMode(tables); 189 SetupDirtyCullMode(tables);
170 SetupDirtyDepthBoundsEnable(tables); 190 SetupDirtyDepthBoundsEnable(tables);
171 SetupDirtyDepthTestEnable(tables); 191 SetupDirtyDepthTestEnable(tables);
@@ -175,9 +195,9 @@ StateTracker::StateTracker(Tegra::GPU& gpu)
175 SetupDirtyStencilOp(tables); 195 SetupDirtyStencilOp(tables);
176 SetupDirtyStencilTestEnable(tables); 196 SetupDirtyStencilTestEnable(tables);
177 SetupDirtyBlending(tables); 197 SetupDirtyBlending(tables);
178 SetupDirtyInstanceDivisors(tables);
179 SetupDirtyVertexAttributes(tables);
180 SetupDirtyViewportSwizzles(tables); 198 SetupDirtyViewportSwizzles(tables);
199 SetupDirtyVertexAttributes(tables);
200 SetupDirtyVertexBindings(tables);
181} 201}
182 202
183} // namespace Vulkan 203} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_state_tracker.h b/src/video_core/renderer_vulkan/vk_state_tracker.h
index 84e918a71..5f78f6950 100644
--- a/src/video_core/renderer_vulkan/vk_state_tracker.h
+++ b/src/video_core/renderer_vulkan/vk_state_tracker.h
@@ -19,12 +19,19 @@ namespace Dirty {
19enum : u8 { 19enum : u8 {
20 First = VideoCommon::Dirty::LastCommonEntry, 20 First = VideoCommon::Dirty::LastCommonEntry,
21 21
22 VertexInput,
23 VertexAttribute0,
24 VertexAttribute31 = VertexAttribute0 + 31,
25 VertexBinding0,
26 VertexBinding31 = VertexBinding0 + 31,
27
22 Viewports, 28 Viewports,
23 Scissors, 29 Scissors,
24 DepthBias, 30 DepthBias,
25 BlendConstants, 31 BlendConstants,
26 DepthBounds, 32 DepthBounds,
27 StencilProperties, 33 StencilProperties,
34 LineWidth,
28 35
29 CullMode, 36 CullMode,
30 DepthBoundsEnable, 37 DepthBoundsEnable,
@@ -36,11 +43,9 @@ enum : u8 {
36 StencilTestEnable, 43 StencilTestEnable,
37 44
38 Blending, 45 Blending,
39 InstanceDivisors,
40 VertexAttributes,
41 ViewportSwizzles, 46 ViewportSwizzles,
42 47
43 Last 48 Last,
44}; 49};
45static_assert(Last <= std::numeric_limits<u8>::max()); 50static_assert(Last <= std::numeric_limits<u8>::max());
46 51
@@ -89,6 +94,10 @@ public:
89 return Exchange(Dirty::StencilProperties, false); 94 return Exchange(Dirty::StencilProperties, false);
90 } 95 }
91 96
97 bool TouchLineWidth() const {
98 return Exchange(Dirty::LineWidth, false);
99 }
100
92 bool TouchCullMode() { 101 bool TouchCullMode() {
93 return Exchange(Dirty::CullMode, false); 102 return Exchange(Dirty::CullMode, false);
94 } 103 }
diff --git a/src/video_core/renderer_vulkan/vk_swapchain.cpp b/src/video_core/renderer_vulkan/vk_swapchain.cpp
index dfd5c65ba..d990eefba 100644
--- a/src/video_core/renderer_vulkan/vk_swapchain.cpp
+++ b/src/video_core/renderer_vulkan/vk_swapchain.cpp
@@ -65,6 +65,9 @@ VKSwapchain::VKSwapchain(VkSurfaceKHR surface_, const Device& device_, VKSchedul
65VKSwapchain::~VKSwapchain() = default; 65VKSwapchain::~VKSwapchain() = default;
66 66
67void VKSwapchain::Create(u32 width, u32 height, bool srgb) { 67void VKSwapchain::Create(u32 width, u32 height, bool srgb) {
68 is_outdated = false;
69 is_suboptimal = false;
70
68 const auto physical_device = device.GetPhysical(); 71 const auto physical_device = device.GetPhysical();
69 const auto capabilities{physical_device.GetSurfaceCapabilitiesKHR(surface)}; 72 const auto capabilities{physical_device.GetSurfaceCapabilitiesKHR(surface)};
70 if (capabilities.maxImageExtent.width == 0 || capabilities.maxImageExtent.height == 0) { 73 if (capabilities.maxImageExtent.width == 0 || capabilities.maxImageExtent.height == 0) {
@@ -82,21 +85,31 @@ void VKSwapchain::Create(u32 width, u32 height, bool srgb) {
82 resource_ticks.resize(image_count); 85 resource_ticks.resize(image_count);
83} 86}
84 87
85bool VKSwapchain::AcquireNextImage() { 88void VKSwapchain::AcquireNextImage() {
86 const VkResult result = 89 const VkResult result = device.GetLogical().AcquireNextImageKHR(
87 device.GetLogical().AcquireNextImageKHR(*swapchain, std::numeric_limits<u64>::max(), 90 *swapchain, std::numeric_limits<u64>::max(), *present_semaphores[frame_index],
88 *present_semaphores[frame_index], {}, &image_index); 91 VK_NULL_HANDLE, &image_index);
89 92 switch (result) {
93 case VK_SUCCESS:
94 break;
95 case VK_SUBOPTIMAL_KHR:
96 is_suboptimal = true;
97 break;
98 case VK_ERROR_OUT_OF_DATE_KHR:
99 is_outdated = true;
100 break;
101 default:
102 LOG_ERROR(Render_Vulkan, "vkAcquireNextImageKHR returned {}", vk::ToString(result));
103 break;
104 }
90 scheduler.Wait(resource_ticks[image_index]); 105 scheduler.Wait(resource_ticks[image_index]);
91 return result == VK_SUCCESS || result == VK_SUBOPTIMAL_KHR; 106 resource_ticks[image_index] = scheduler.CurrentTick();
92} 107}
93 108
94bool VKSwapchain::Present(VkSemaphore render_semaphore) { 109void VKSwapchain::Present(VkSemaphore render_semaphore) {
95 const VkSemaphore present_semaphore{*present_semaphores[frame_index]}; 110 const VkSemaphore present_semaphore{*present_semaphores[frame_index]};
96 const std::array<VkSemaphore, 2> semaphores{present_semaphore, render_semaphore}; 111 const std::array<VkSemaphore, 2> semaphores{present_semaphore, render_semaphore};
97 const auto present_queue{device.GetPresentQueue()}; 112 const auto present_queue{device.GetPresentQueue()};
98 bool recreated = false;
99
100 const VkPresentInfoKHR present_info{ 113 const VkPresentInfoKHR present_info{
101 .sType = VK_STRUCTURE_TYPE_PRESENT_INFO_KHR, 114 .sType = VK_STRUCTURE_TYPE_PRESENT_INFO_KHR,
102 .pNext = nullptr, 115 .pNext = nullptr,
@@ -107,7 +120,6 @@ bool VKSwapchain::Present(VkSemaphore render_semaphore) {
107 .pImageIndices = &image_index, 120 .pImageIndices = &image_index,
108 .pResults = nullptr, 121 .pResults = nullptr,
109 }; 122 };
110
111 switch (const VkResult result = present_queue.Present(present_info)) { 123 switch (const VkResult result = present_queue.Present(present_info)) {
112 case VK_SUCCESS: 124 case VK_SUCCESS:
113 break; 125 break;
@@ -115,24 +127,16 @@ bool VKSwapchain::Present(VkSemaphore render_semaphore) {
115 LOG_DEBUG(Render_Vulkan, "Suboptimal swapchain"); 127 LOG_DEBUG(Render_Vulkan, "Suboptimal swapchain");
116 break; 128 break;
117 case VK_ERROR_OUT_OF_DATE_KHR: 129 case VK_ERROR_OUT_OF_DATE_KHR:
118 if (current_width > 0 && current_height > 0) { 130 is_outdated = true;
119 Create(current_width, current_height, current_srgb);
120 recreated = true;
121 }
122 break; 131 break;
123 default: 132 default:
124 LOG_CRITICAL(Render_Vulkan, "Failed to present with error {}", vk::ToString(result)); 133 LOG_CRITICAL(Render_Vulkan, "Failed to present with error {}", vk::ToString(result));
125 break; 134 break;
126 } 135 }
127 136 ++frame_index;
128 resource_ticks[image_index] = scheduler.CurrentTick(); 137 if (frame_index >= image_count) {
129 frame_index = (frame_index + 1) % static_cast<u32>(image_count); 138 frame_index = 0;
130 return recreated; 139 }
131}
132
133bool VKSwapchain::HasFramebufferChanged(const Layout::FramebufferLayout& framebuffer) const {
134 // TODO(Rodrigo): Handle framebuffer pixel format changes
135 return framebuffer.width != current_width || framebuffer.height != current_height;
136} 140}
137 141
138void VKSwapchain::CreateSwapchain(const VkSurfaceCapabilitiesKHR& capabilities, u32 width, 142void VKSwapchain::CreateSwapchain(const VkSurfaceCapabilitiesKHR& capabilities, u32 width,
@@ -148,7 +152,6 @@ void VKSwapchain::CreateSwapchain(const VkSurfaceCapabilitiesKHR& capabilities,
148 if (capabilities.maxImageCount > 0 && requested_image_count > capabilities.maxImageCount) { 152 if (capabilities.maxImageCount > 0 && requested_image_count > capabilities.maxImageCount) {
149 requested_image_count = capabilities.maxImageCount; 153 requested_image_count = capabilities.maxImageCount;
150 } 154 }
151
152 VkSwapchainCreateInfoKHR swapchain_ci{ 155 VkSwapchainCreateInfoKHR swapchain_ci{
153 .sType = VK_STRUCTURE_TYPE_SWAPCHAIN_CREATE_INFO_KHR, 156 .sType = VK_STRUCTURE_TYPE_SWAPCHAIN_CREATE_INFO_KHR,
154 .pNext = nullptr, 157 .pNext = nullptr,
@@ -169,7 +172,6 @@ void VKSwapchain::CreateSwapchain(const VkSurfaceCapabilitiesKHR& capabilities,
169 .clipped = VK_FALSE, 172 .clipped = VK_FALSE,
170 .oldSwapchain = nullptr, 173 .oldSwapchain = nullptr,
171 }; 174 };
172
173 const u32 graphics_family{device.GetGraphicsFamily()}; 175 const u32 graphics_family{device.GetGraphicsFamily()};
174 const u32 present_family{device.GetPresentFamily()}; 176 const u32 present_family{device.GetPresentFamily()};
175 const std::array<u32, 2> queue_indices{graphics_family, present_family}; 177 const std::array<u32, 2> queue_indices{graphics_family, present_family};
@@ -178,7 +180,6 @@ void VKSwapchain::CreateSwapchain(const VkSurfaceCapabilitiesKHR& capabilities,
178 swapchain_ci.queueFamilyIndexCount = static_cast<u32>(queue_indices.size()); 180 swapchain_ci.queueFamilyIndexCount = static_cast<u32>(queue_indices.size());
179 swapchain_ci.pQueueFamilyIndices = queue_indices.data(); 181 swapchain_ci.pQueueFamilyIndices = queue_indices.data();
180 } 182 }
181
182 // Request the size again to reduce the possibility of a TOCTOU race condition. 183 // Request the size again to reduce the possibility of a TOCTOU race condition.
183 const auto updated_capabilities = physical_device.GetSurfaceCapabilitiesKHR(surface); 184 const auto updated_capabilities = physical_device.GetSurfaceCapabilitiesKHR(surface);
184 swapchain_ci.imageExtent = ChooseSwapExtent(updated_capabilities, width, height); 185 swapchain_ci.imageExtent = ChooseSwapExtent(updated_capabilities, width, height);
@@ -186,8 +187,6 @@ void VKSwapchain::CreateSwapchain(const VkSurfaceCapabilitiesKHR& capabilities,
186 swapchain = device.GetLogical().CreateSwapchainKHR(swapchain_ci); 187 swapchain = device.GetLogical().CreateSwapchainKHR(swapchain_ci);
187 188
188 extent = swapchain_ci.imageExtent; 189 extent = swapchain_ci.imageExtent;
189 current_width = extent.width;
190 current_height = extent.height;
191 current_srgb = srgb; 190 current_srgb = srgb;
192 191
193 images = swapchain.GetImages(); 192 images = swapchain.GetImages();
@@ -197,8 +196,8 @@ void VKSwapchain::CreateSwapchain(const VkSurfaceCapabilitiesKHR& capabilities,
197 196
198void VKSwapchain::CreateSemaphores() { 197void VKSwapchain::CreateSemaphores() {
199 present_semaphores.resize(image_count); 198 present_semaphores.resize(image_count);
200 std::generate(present_semaphores.begin(), present_semaphores.end(), 199 std::ranges::generate(present_semaphores,
201 [this] { return device.GetLogical().CreateSemaphore(); }); 200 [this] { return device.GetLogical().CreateSemaphore(); });
202} 201}
203 202
204void VKSwapchain::CreateImageViews() { 203void VKSwapchain::CreateImageViews() {
diff --git a/src/video_core/renderer_vulkan/vk_swapchain.h b/src/video_core/renderer_vulkan/vk_swapchain.h
index adc8d27cf..35c2cdc14 100644
--- a/src/video_core/renderer_vulkan/vk_swapchain.h
+++ b/src/video_core/renderer_vulkan/vk_swapchain.h
@@ -28,14 +28,25 @@ public:
28 void Create(u32 width, u32 height, bool srgb); 28 void Create(u32 width, u32 height, bool srgb);
29 29
30 /// Acquires the next image in the swapchain, waits as needed. 30 /// Acquires the next image in the swapchain, waits as needed.
31 bool AcquireNextImage(); 31 void AcquireNextImage();
32 32
33 /// Presents the rendered image to the swapchain. Returns true when the swapchains had to be 33 /// Presents the rendered image to the swapchain.
34 /// recreated. Takes responsability for the ownership of fence. 34 void Present(VkSemaphore render_semaphore);
35 bool Present(VkSemaphore render_semaphore);
36 35
37 /// Returns true when the framebuffer layout has changed. 36 /// Returns true when the color space has changed.
38 bool HasFramebufferChanged(const Layout::FramebufferLayout& framebuffer) const; 37 bool HasColorSpaceChanged(bool is_srgb) const {
38 return current_srgb != is_srgb;
39 }
40
41 /// Returns true when the swapchain is outdated.
42 bool IsOutDated() const {
43 return is_outdated;
44 }
45
46 /// Returns true when the swapchain is suboptimal.
47 bool IsSubOptimal() const {
48 return is_suboptimal;
49 }
39 50
40 VkExtent2D GetSize() const { 51 VkExtent2D GetSize() const {
41 return extent; 52 return extent;
@@ -61,10 +72,6 @@ public:
61 return image_format; 72 return image_format;
62 } 73 }
63 74
64 bool GetSrgbState() const {
65 return current_srgb;
66 }
67
68private: 75private:
69 void CreateSwapchain(const VkSurfaceCapabilitiesKHR& capabilities, u32 width, u32 height, 76 void CreateSwapchain(const VkSurfaceCapabilitiesKHR& capabilities, u32 width, u32 height,
70 bool srgb); 77 bool srgb);
@@ -92,9 +99,9 @@ private:
92 VkFormat image_format{}; 99 VkFormat image_format{};
93 VkExtent2D extent{}; 100 VkExtent2D extent{};
94 101
95 u32 current_width{};
96 u32 current_height{};
97 bool current_srgb{}; 102 bool current_srgb{};
103 bool is_outdated{};
104 bool is_suboptimal{};
98}; 105};
99 106
100} // namespace Vulkan 107} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp
index 88ccf96f5..8e029bcb3 100644
--- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp
@@ -15,6 +15,7 @@
15#include "video_core/renderer_vulkan/maxwell_to_vk.h" 15#include "video_core/renderer_vulkan/maxwell_to_vk.h"
16#include "video_core/renderer_vulkan/vk_compute_pass.h" 16#include "video_core/renderer_vulkan/vk_compute_pass.h"
17#include "video_core/renderer_vulkan/vk_rasterizer.h" 17#include "video_core/renderer_vulkan/vk_rasterizer.h"
18#include "video_core/renderer_vulkan/vk_render_pass_cache.h"
18#include "video_core/renderer_vulkan/vk_scheduler.h" 19#include "video_core/renderer_vulkan/vk_scheduler.h"
19#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" 20#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h"
20#include "video_core/renderer_vulkan/vk_texture_cache.h" 21#include "video_core/renderer_vulkan/vk_texture_cache.h"
@@ -34,19 +35,6 @@ using VideoCommon::SubresourceRange;
34using VideoCore::Surface::IsPixelFormatASTC; 35using VideoCore::Surface::IsPixelFormatASTC;
35 36
36namespace { 37namespace {
37
38constexpr std::array ATTACHMENT_REFERENCES{
39 VkAttachmentReference{0, VK_IMAGE_LAYOUT_GENERAL},
40 VkAttachmentReference{1, VK_IMAGE_LAYOUT_GENERAL},
41 VkAttachmentReference{2, VK_IMAGE_LAYOUT_GENERAL},
42 VkAttachmentReference{3, VK_IMAGE_LAYOUT_GENERAL},
43 VkAttachmentReference{4, VK_IMAGE_LAYOUT_GENERAL},
44 VkAttachmentReference{5, VK_IMAGE_LAYOUT_GENERAL},
45 VkAttachmentReference{6, VK_IMAGE_LAYOUT_GENERAL},
46 VkAttachmentReference{7, VK_IMAGE_LAYOUT_GENERAL},
47 VkAttachmentReference{8, VK_IMAGE_LAYOUT_GENERAL},
48};
49
50constexpr VkBorderColor ConvertBorderColor(const std::array<float, 4>& color) { 38constexpr VkBorderColor ConvertBorderColor(const std::array<float, 4>& color) {
51 if (color == std::array<float, 4>{0, 0, 0, 0}) { 39 if (color == std::array<float, 4>{0, 0, 0, 0}) {
52 return VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK; 40 return VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK;
@@ -174,25 +162,6 @@ constexpr VkBorderColor ConvertBorderColor(const std::array<float, 4>& color) {
174 return device.GetLogical().CreateImage(MakeImageCreateInfo(device, info)); 162 return device.GetLogical().CreateImage(MakeImageCreateInfo(device, info));
175} 163}
176 164
177[[nodiscard]] vk::Buffer MakeBuffer(const Device& device, const ImageInfo& info) {
178 if (info.type != ImageType::Buffer) {
179 return vk::Buffer{};
180 }
181 const size_t bytes_per_block = VideoCore::Surface::BytesPerBlock(info.format);
182 return device.GetLogical().CreateBuffer(VkBufferCreateInfo{
183 .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
184 .pNext = nullptr,
185 .flags = 0,
186 .size = info.size.width * bytes_per_block,
187 .usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT |
188 VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT |
189 VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT,
190 .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
191 .queueFamilyIndexCount = 0,
192 .pQueueFamilyIndices = nullptr,
193 });
194}
195
196[[nodiscard]] VkImageAspectFlags ImageAspectMask(PixelFormat format) { 165[[nodiscard]] VkImageAspectFlags ImageAspectMask(PixelFormat format) {
197 switch (VideoCore::Surface::GetFormatType(format)) { 166 switch (VideoCore::Surface::GetFormatType(format)) {
198 case VideoCore::Surface::SurfaceType::ColorTexture: 167 case VideoCore::Surface::SurfaceType::ColorTexture:
@@ -226,23 +195,6 @@ constexpr VkBorderColor ConvertBorderColor(const std::array<float, 4>& color) {
226 } 195 }
227} 196}
228 197
229[[nodiscard]] VkAttachmentDescription AttachmentDescription(const Device& device,
230 const ImageView* image_view) {
231 using MaxwellToVK::SurfaceFormat;
232 const PixelFormat pixel_format = image_view->format;
233 return VkAttachmentDescription{
234 .flags = VK_ATTACHMENT_DESCRIPTION_MAY_ALIAS_BIT,
235 .format = SurfaceFormat(device, FormatType::Optimal, true, pixel_format).format,
236 .samples = image_view->Samples(),
237 .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
238 .storeOp = VK_ATTACHMENT_STORE_OP_STORE,
239 .stencilLoadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
240 .stencilStoreOp = VK_ATTACHMENT_STORE_OP_STORE,
241 .initialLayout = VK_IMAGE_LAYOUT_GENERAL,
242 .finalLayout = VK_IMAGE_LAYOUT_GENERAL,
243 };
244}
245
246[[nodiscard]] VkComponentSwizzle ComponentSwizzle(SwizzleSource swizzle) { 198[[nodiscard]] VkComponentSwizzle ComponentSwizzle(SwizzleSource swizzle) {
247 switch (swizzle) { 199 switch (swizzle) {
248 case SwizzleSource::Zero: 200 case SwizzleSource::Zero:
@@ -263,6 +215,30 @@ constexpr VkBorderColor ConvertBorderColor(const std::array<float, 4>& color) {
263 return VK_COMPONENT_SWIZZLE_ZERO; 215 return VK_COMPONENT_SWIZZLE_ZERO;
264} 216}
265 217
218[[nodiscard]] VkImageViewType ImageViewType(Shader::TextureType type) {
219 switch (type) {
220 case Shader::TextureType::Color1D:
221 return VK_IMAGE_VIEW_TYPE_1D;
222 case Shader::TextureType::Color2D:
223 return VK_IMAGE_VIEW_TYPE_2D;
224 case Shader::TextureType::ColorCube:
225 return VK_IMAGE_VIEW_TYPE_CUBE;
226 case Shader::TextureType::Color3D:
227 return VK_IMAGE_VIEW_TYPE_3D;
228 case Shader::TextureType::ColorArray1D:
229 return VK_IMAGE_VIEW_TYPE_1D_ARRAY;
230 case Shader::TextureType::ColorArray2D:
231 return VK_IMAGE_VIEW_TYPE_2D_ARRAY;
232 case Shader::TextureType::ColorArrayCube:
233 return VK_IMAGE_VIEW_TYPE_CUBE_ARRAY;
234 case Shader::TextureType::Buffer:
235 UNREACHABLE_MSG("Texture buffers can't be image views");
236 return VK_IMAGE_VIEW_TYPE_1D;
237 }
238 UNREACHABLE_MSG("Invalid image view type={}", type);
239 return VK_IMAGE_VIEW_TYPE_2D;
240}
241
266[[nodiscard]] VkImageViewType ImageViewType(VideoCommon::ImageViewType type) { 242[[nodiscard]] VkImageViewType ImageViewType(VideoCommon::ImageViewType type) {
267 switch (type) { 243 switch (type) {
268 case VideoCommon::ImageViewType::e1D: 244 case VideoCommon::ImageViewType::e1D:
@@ -280,7 +256,7 @@ constexpr VkBorderColor ConvertBorderColor(const std::array<float, 4>& color) {
280 case VideoCommon::ImageViewType::CubeArray: 256 case VideoCommon::ImageViewType::CubeArray:
281 return VK_IMAGE_VIEW_TYPE_CUBE_ARRAY; 257 return VK_IMAGE_VIEW_TYPE_CUBE_ARRAY;
282 case VideoCommon::ImageViewType::Rect: 258 case VideoCommon::ImageViewType::Rect:
283 LOG_WARNING(Render_Vulkan, "Unnormalized image view type not supported"); 259 UNIMPLEMENTED_MSG("Rect image view");
284 return VK_IMAGE_VIEW_TYPE_2D; 260 return VK_IMAGE_VIEW_TYPE_2D;
285 case VideoCommon::ImageViewType::Buffer: 261 case VideoCommon::ImageViewType::Buffer:
286 UNREACHABLE_MSG("Texture buffers can't be image views"); 262 UNREACHABLE_MSG("Texture buffers can't be image views");
@@ -327,7 +303,7 @@ constexpr VkBorderColor ConvertBorderColor(const std::array<float, 4>& color) {
327 }; 303 };
328} 304}
329 305
330[[nodiscard]] std::vector<VkBufferCopy> TransformBufferCopies( 306[[maybe_unused]] [[nodiscard]] std::vector<VkBufferCopy> TransformBufferCopies(
331 std::span<const VideoCommon::BufferCopy> copies, size_t buffer_offset) { 307 std::span<const VideoCommon::BufferCopy> copies, size_t buffer_offset) {
332 std::vector<VkBufferCopy> result(copies.size()); 308 std::vector<VkBufferCopy> result(copies.size());
333 std::ranges::transform( 309 std::ranges::transform(
@@ -587,6 +563,28 @@ struct RangedBarrierRange {
587 } 563 }
588}; 564};
589 565
566[[nodiscard]] VkFormat Format(Shader::ImageFormat format) {
567 switch (format) {
568 case Shader::ImageFormat::Typeless:
569 break;
570 case Shader::ImageFormat::R8_SINT:
571 return VK_FORMAT_R8_SINT;
572 case Shader::ImageFormat::R8_UINT:
573 return VK_FORMAT_R8_UINT;
574 case Shader::ImageFormat::R16_UINT:
575 return VK_FORMAT_R16_UINT;
576 case Shader::ImageFormat::R16_SINT:
577 return VK_FORMAT_R16_SINT;
578 case Shader::ImageFormat::R32_UINT:
579 return VK_FORMAT_R32_UINT;
580 case Shader::ImageFormat::R32G32_UINT:
581 return VK_FORMAT_R32G32_UINT;
582 case Shader::ImageFormat::R32G32B32A32_UINT:
583 return VK_FORMAT_R32G32B32A32_UINT;
584 }
585 UNREACHABLE_MSG("Invalid image format={}", format);
586 return VK_FORMAT_R32_UINT;
587}
590} // Anonymous namespace 588} // Anonymous namespace
591 589
592void TextureCacheRuntime::Finish() { 590void TextureCacheRuntime::Finish() {
@@ -625,7 +623,7 @@ void TextureCacheRuntime::BlitImage(Framebuffer* dst_framebuffer, ImageView& dst
625 return; 623 return;
626 } 624 }
627 } 625 }
628 ASSERT(src.ImageFormat() == dst.ImageFormat()); 626 ASSERT(src.format == dst.format);
629 ASSERT(!(is_dst_msaa && !is_src_msaa)); 627 ASSERT(!(is_dst_msaa && !is_src_msaa));
630 ASSERT(operation == Fermi2D::Operation::SrcCopy); 628 ASSERT(operation == Fermi2D::Operation::SrcCopy);
631 629
@@ -842,13 +840,9 @@ u64 TextureCacheRuntime::GetDeviceLocalMemory() const {
842Image::Image(TextureCacheRuntime& runtime, const ImageInfo& info_, GPUVAddr gpu_addr_, 840Image::Image(TextureCacheRuntime& runtime, const ImageInfo& info_, GPUVAddr gpu_addr_,
843 VAddr cpu_addr_) 841 VAddr cpu_addr_)
844 : VideoCommon::ImageBase(info_, gpu_addr_, cpu_addr_), scheduler{&runtime.scheduler}, 842 : VideoCommon::ImageBase(info_, gpu_addr_, cpu_addr_), scheduler{&runtime.scheduler},
845 image(MakeImage(runtime.device, info)), buffer(MakeBuffer(runtime.device, info)), 843 image(MakeImage(runtime.device, info)),
844 commit(runtime.memory_allocator.Commit(image, MemoryUsage::DeviceLocal)),
846 aspect_mask(ImageAspectMask(info.format)) { 845 aspect_mask(ImageAspectMask(info.format)) {
847 if (image) {
848 commit = runtime.memory_allocator.Commit(image, MemoryUsage::DeviceLocal);
849 } else {
850 commit = runtime.memory_allocator.Commit(buffer, MemoryUsage::DeviceLocal);
851 }
852 if (IsPixelFormatASTC(info.format) && !runtime.device.IsOptimalAstcSupported()) { 846 if (IsPixelFormatASTC(info.format) && !runtime.device.IsOptimalAstcSupported()) {
853 if (Settings::values.accelerate_astc.GetValue()) { 847 if (Settings::values.accelerate_astc.GetValue()) {
854 flags |= VideoCommon::ImageFlagBits::AcceleratedUpload; 848 flags |= VideoCommon::ImageFlagBits::AcceleratedUpload;
@@ -857,11 +851,7 @@ Image::Image(TextureCacheRuntime& runtime, const ImageInfo& info_, GPUVAddr gpu_
857 } 851 }
858 } 852 }
859 if (runtime.device.HasDebuggingToolAttached()) { 853 if (runtime.device.HasDebuggingToolAttached()) {
860 if (image) { 854 image.SetObjectNameEXT(VideoCommon::Name(*this).c_str());
861 image.SetObjectNameEXT(VideoCommon::Name(*this).c_str());
862 } else {
863 buffer.SetObjectNameEXT(VideoCommon::Name(*this).c_str());
864 }
865 } 855 }
866 static constexpr VkImageViewUsageCreateInfo storage_image_view_usage_create_info{ 856 static constexpr VkImageViewUsageCreateInfo storage_image_view_usage_create_info{
867 .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_USAGE_CREATE_INFO, 857 .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_USAGE_CREATE_INFO,
@@ -913,19 +903,6 @@ void Image::UploadMemory(const StagingBufferRef& map, std::span<const BufferImag
913 }); 903 });
914} 904}
915 905
916void Image::UploadMemory(const StagingBufferRef& map,
917 std::span<const VideoCommon::BufferCopy> copies) {
918 // TODO: Move this to another API
919 scheduler->RequestOutsideRenderPassOperationContext();
920 std::vector vk_copies = TransformBufferCopies(copies, map.offset);
921 const VkBuffer src_buffer = map.buffer;
922 const VkBuffer dst_buffer = *buffer;
923 scheduler->Record([src_buffer, dst_buffer, vk_copies](vk::CommandBuffer cmdbuf) {
924 // TODO: Barriers
925 cmdbuf.CopyBuffer(src_buffer, dst_buffer, vk_copies);
926 });
927}
928
929void Image::DownloadMemory(const StagingBufferRef& map, std::span<const BufferImageCopy> copies) { 906void Image::DownloadMemory(const StagingBufferRef& map, std::span<const BufferImageCopy> copies) {
930 std::vector vk_copies = TransformBufferImageCopies(copies, map.offset, aspect_mask); 907 std::vector vk_copies = TransformBufferImageCopies(copies, map.offset, aspect_mask);
931 scheduler->RequestOutsideRenderPassOperationContext(); 908 scheduler->RequestOutsideRenderPassOperationContext();
@@ -984,8 +961,9 @@ void Image::DownloadMemory(const StagingBufferRef& map, std::span<const BufferIm
984ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageViewInfo& info, 961ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageViewInfo& info,
985 ImageId image_id_, Image& image) 962 ImageId image_id_, Image& image)
986 : VideoCommon::ImageViewBase{info, image.info, image_id_}, device{&runtime.device}, 963 : VideoCommon::ImageViewBase{info, image.info, image_id_}, device{&runtime.device},
987 image_handle{image.Handle()}, image_format{image.info.format}, samples{ConvertSampleCount( 964 image_handle{image.Handle()}, samples{ConvertSampleCount(image.info.num_samples)} {
988 image.info.num_samples)} { 965 using Shader::TextureType;
966
989 const VkImageAspectFlags aspect_mask = ImageViewAspectMask(info); 967 const VkImageAspectFlags aspect_mask = ImageViewAspectMask(info);
990 std::array<SwizzleSource, 4> swizzle{ 968 std::array<SwizzleSource, 4> swizzle{
991 SwizzleSource::R, 969 SwizzleSource::R,
@@ -1023,57 +1001,54 @@ ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageViewI
1023 }, 1001 },
1024 .subresourceRange = MakeSubresourceRange(aspect_mask, info.range), 1002 .subresourceRange = MakeSubresourceRange(aspect_mask, info.range),
1025 }; 1003 };
1026 const auto create = [&](VideoCommon::ImageViewType view_type, std::optional<u32> num_layers) { 1004 const auto create = [&](TextureType tex_type, std::optional<u32> num_layers) {
1027 VkImageViewCreateInfo ci{create_info}; 1005 VkImageViewCreateInfo ci{create_info};
1028 ci.viewType = ImageViewType(view_type); 1006 ci.viewType = ImageViewType(tex_type);
1029 if (num_layers) { 1007 if (num_layers) {
1030 ci.subresourceRange.layerCount = *num_layers; 1008 ci.subresourceRange.layerCount = *num_layers;
1031 } 1009 }
1032 vk::ImageView handle = device->GetLogical().CreateImageView(ci); 1010 vk::ImageView handle = device->GetLogical().CreateImageView(ci);
1033 if (device->HasDebuggingToolAttached()) { 1011 if (device->HasDebuggingToolAttached()) {
1034 handle.SetObjectNameEXT(VideoCommon::Name(*this, view_type).c_str()); 1012 handle.SetObjectNameEXT(VideoCommon::Name(*this).c_str());
1035 } 1013 }
1036 image_views[static_cast<size_t>(view_type)] = std::move(handle); 1014 image_views[static_cast<size_t>(tex_type)] = std::move(handle);
1037 }; 1015 };
1038 switch (info.type) { 1016 switch (info.type) {
1039 case VideoCommon::ImageViewType::e1D: 1017 case VideoCommon::ImageViewType::e1D:
1040 case VideoCommon::ImageViewType::e1DArray: 1018 case VideoCommon::ImageViewType::e1DArray:
1041 create(VideoCommon::ImageViewType::e1D, 1); 1019 create(TextureType::Color1D, 1);
1042 create(VideoCommon::ImageViewType::e1DArray, std::nullopt); 1020 create(TextureType::ColorArray1D, std::nullopt);
1043 render_target = Handle(VideoCommon::ImageViewType::e1DArray); 1021 render_target = Handle(TextureType::ColorArray1D);
1044 break; 1022 break;
1045 case VideoCommon::ImageViewType::e2D: 1023 case VideoCommon::ImageViewType::e2D:
1046 case VideoCommon::ImageViewType::e2DArray: 1024 case VideoCommon::ImageViewType::e2DArray:
1047 create(VideoCommon::ImageViewType::e2D, 1); 1025 create(TextureType::Color2D, 1);
1048 create(VideoCommon::ImageViewType::e2DArray, std::nullopt); 1026 create(TextureType::ColorArray2D, std::nullopt);
1049 render_target = Handle(VideoCommon::ImageViewType::e2DArray); 1027 render_target = Handle(Shader::TextureType::ColorArray2D);
1050 break; 1028 break;
1051 case VideoCommon::ImageViewType::e3D: 1029 case VideoCommon::ImageViewType::e3D:
1052 create(VideoCommon::ImageViewType::e3D, std::nullopt); 1030 create(TextureType::Color3D, std::nullopt);
1053 render_target = Handle(VideoCommon::ImageViewType::e3D); 1031 render_target = Handle(Shader::TextureType::Color3D);
1054 break; 1032 break;
1055 case VideoCommon::ImageViewType::Cube: 1033 case VideoCommon::ImageViewType::Cube:
1056 case VideoCommon::ImageViewType::CubeArray: 1034 case VideoCommon::ImageViewType::CubeArray:
1057 create(VideoCommon::ImageViewType::Cube, 6); 1035 create(TextureType::ColorCube, 6);
1058 create(VideoCommon::ImageViewType::CubeArray, std::nullopt); 1036 create(TextureType::ColorArrayCube, std::nullopt);
1059 break; 1037 break;
1060 case VideoCommon::ImageViewType::Rect: 1038 case VideoCommon::ImageViewType::Rect:
1061 UNIMPLEMENTED(); 1039 UNIMPLEMENTED();
1062 break; 1040 break;
1063 case VideoCommon::ImageViewType::Buffer: 1041 case VideoCommon::ImageViewType::Buffer:
1064 buffer_view = device->GetLogical().CreateBufferView(VkBufferViewCreateInfo{ 1042 UNREACHABLE();
1065 .sType = VK_STRUCTURE_TYPE_BUFFER_VIEW_CREATE_INFO,
1066 .pNext = nullptr,
1067 .flags = 0,
1068 .buffer = image.Buffer(),
1069 .format = format_info.format,
1070 .offset = 0, // TODO: Redesign buffer cache to support this
1071 .range = image.guest_size_bytes,
1072 });
1073 break; 1043 break;
1074 } 1044 }
1075} 1045}
1076 1046
1047ImageView::ImageView(TextureCacheRuntime&, const VideoCommon::ImageInfo& info,
1048 const VideoCommon::ImageViewInfo& view_info, GPUVAddr gpu_addr_)
1049 : VideoCommon::ImageViewBase{info, view_info}, gpu_addr{gpu_addr_},
1050 buffer_size{VideoCommon::CalculateGuestSizeInBytes(info)} {}
1051
1077ImageView::ImageView(TextureCacheRuntime&, const VideoCommon::NullImageParams& params) 1052ImageView::ImageView(TextureCacheRuntime&, const VideoCommon::NullImageParams& params)
1078 : VideoCommon::ImageViewBase{params} {} 1053 : VideoCommon::ImageViewBase{params} {}
1079 1054
@@ -1081,7 +1056,8 @@ VkImageView ImageView::DepthView() {
1081 if (depth_view) { 1056 if (depth_view) {
1082 return *depth_view; 1057 return *depth_view;
1083 } 1058 }
1084 depth_view = MakeDepthStencilView(VK_IMAGE_ASPECT_DEPTH_BIT); 1059 const auto& info = MaxwellToVK::SurfaceFormat(*device, FormatType::Optimal, true, format);
1060 depth_view = MakeView(info.format, VK_IMAGE_ASPECT_DEPTH_BIT);
1085 return *depth_view; 1061 return *depth_view;
1086} 1062}
1087 1063
@@ -1089,18 +1065,38 @@ VkImageView ImageView::StencilView() {
1089 if (stencil_view) { 1065 if (stencil_view) {
1090 return *stencil_view; 1066 return *stencil_view;
1091 } 1067 }
1092 stencil_view = MakeDepthStencilView(VK_IMAGE_ASPECT_STENCIL_BIT); 1068 const auto& info = MaxwellToVK::SurfaceFormat(*device, FormatType::Optimal, true, format);
1069 stencil_view = MakeView(info.format, VK_IMAGE_ASPECT_STENCIL_BIT);
1093 return *stencil_view; 1070 return *stencil_view;
1094} 1071}
1095 1072
1096vk::ImageView ImageView::MakeDepthStencilView(VkImageAspectFlags aspect_mask) { 1073VkImageView ImageView::StorageView(Shader::TextureType texture_type,
1074 Shader::ImageFormat image_format) {
1075 if (image_format == Shader::ImageFormat::Typeless) {
1076 return Handle(texture_type);
1077 }
1078 const bool is_signed{image_format == Shader::ImageFormat::R8_SINT ||
1079 image_format == Shader::ImageFormat::R16_SINT};
1080 if (!storage_views) {
1081 storage_views = std::make_unique<StorageViews>();
1082 }
1083 auto& views{is_signed ? storage_views->signeds : storage_views->unsigneds};
1084 auto& view{views[static_cast<size_t>(texture_type)]};
1085 if (view) {
1086 return *view;
1087 }
1088 view = MakeView(Format(image_format), VK_IMAGE_ASPECT_COLOR_BIT);
1089 return *view;
1090}
1091
1092vk::ImageView ImageView::MakeView(VkFormat vk_format, VkImageAspectFlags aspect_mask) {
1097 return device->GetLogical().CreateImageView({ 1093 return device->GetLogical().CreateImageView({
1098 .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, 1094 .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
1099 .pNext = nullptr, 1095 .pNext = nullptr,
1100 .flags = 0, 1096 .flags = 0,
1101 .image = image_handle, 1097 .image = image_handle,
1102 .viewType = ImageViewType(type), 1098 .viewType = ImageViewType(type),
1103 .format = MaxwellToVK::SurfaceFormat(*device, FormatType::Optimal, true, format).format, 1099 .format = vk_format,
1104 .components{ 1100 .components{
1105 .r = VK_COMPONENT_SWIZZLE_IDENTITY, 1101 .r = VK_COMPONENT_SWIZZLE_IDENTITY,
1106 .g = VK_COMPONENT_SWIZZLE_IDENTITY, 1102 .g = VK_COMPONENT_SWIZZLE_IDENTITY,
@@ -1164,7 +1160,6 @@ Sampler::Sampler(TextureCacheRuntime& runtime, const Tegra::Texture::TSCEntry& t
1164 1160
1165Framebuffer::Framebuffer(TextureCacheRuntime& runtime, std::span<ImageView*, NUM_RT> color_buffers, 1161Framebuffer::Framebuffer(TextureCacheRuntime& runtime, std::span<ImageView*, NUM_RT> color_buffers,
1166 ImageView* depth_buffer, const VideoCommon::RenderTargets& key) { 1162 ImageView* depth_buffer, const VideoCommon::RenderTargets& key) {
1167 std::vector<VkAttachmentDescription> descriptions;
1168 std::vector<VkImageView> attachments; 1163 std::vector<VkImageView> attachments;
1169 RenderPassKey renderpass_key{}; 1164 RenderPassKey renderpass_key{};
1170 s32 num_layers = 1; 1165 s32 num_layers = 1;
@@ -1175,7 +1170,6 @@ Framebuffer::Framebuffer(TextureCacheRuntime& runtime, std::span<ImageView*, NUM
1175 renderpass_key.color_formats[index] = PixelFormat::Invalid; 1170 renderpass_key.color_formats[index] = PixelFormat::Invalid;
1176 continue; 1171 continue;
1177 } 1172 }
1178 descriptions.push_back(AttachmentDescription(runtime.device, color_buffer));
1179 attachments.push_back(color_buffer->RenderTarget()); 1173 attachments.push_back(color_buffer->RenderTarget());
1180 renderpass_key.color_formats[index] = color_buffer->format; 1174 renderpass_key.color_formats[index] = color_buffer->format;
1181 num_layers = std::max(num_layers, color_buffer->range.extent.layers); 1175 num_layers = std::max(num_layers, color_buffer->range.extent.layers);
@@ -1185,10 +1179,7 @@ Framebuffer::Framebuffer(TextureCacheRuntime& runtime, std::span<ImageView*, NUM
1185 ++num_images; 1179 ++num_images;
1186 } 1180 }
1187 const size_t num_colors = attachments.size(); 1181 const size_t num_colors = attachments.size();
1188 const VkAttachmentReference* depth_attachment =
1189 depth_buffer ? &ATTACHMENT_REFERENCES[num_colors] : nullptr;
1190 if (depth_buffer) { 1182 if (depth_buffer) {
1191 descriptions.push_back(AttachmentDescription(runtime.device, depth_buffer));
1192 attachments.push_back(depth_buffer->RenderTarget()); 1183 attachments.push_back(depth_buffer->RenderTarget());
1193 renderpass_key.depth_format = depth_buffer->format; 1184 renderpass_key.depth_format = depth_buffer->format;
1194 num_layers = std::max(num_layers, depth_buffer->range.extent.layers); 1185 num_layers = std::max(num_layers, depth_buffer->range.extent.layers);
@@ -1201,40 +1192,14 @@ Framebuffer::Framebuffer(TextureCacheRuntime& runtime, std::span<ImageView*, NUM
1201 } 1192 }
1202 renderpass_key.samples = samples; 1193 renderpass_key.samples = samples;
1203 1194
1204 const auto& device = runtime.device.GetLogical(); 1195 renderpass = runtime.render_pass_cache.Get(renderpass_key);
1205 const auto [cache_pair, is_new] = runtime.renderpass_cache.try_emplace(renderpass_key); 1196
1206 if (is_new) {
1207 const VkSubpassDescription subpass{
1208 .flags = 0,
1209 .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
1210 .inputAttachmentCount = 0,
1211 .pInputAttachments = nullptr,
1212 .colorAttachmentCount = static_cast<u32>(num_colors),
1213 .pColorAttachments = num_colors != 0 ? ATTACHMENT_REFERENCES.data() : nullptr,
1214 .pResolveAttachments = nullptr,
1215 .pDepthStencilAttachment = depth_attachment,
1216 .preserveAttachmentCount = 0,
1217 .pPreserveAttachments = nullptr,
1218 };
1219 cache_pair->second = device.CreateRenderPass(VkRenderPassCreateInfo{
1220 .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO,
1221 .pNext = nullptr,
1222 .flags = 0,
1223 .attachmentCount = static_cast<u32>(descriptions.size()),
1224 .pAttachments = descriptions.data(),
1225 .subpassCount = 1,
1226 .pSubpasses = &subpass,
1227 .dependencyCount = 0,
1228 .pDependencies = nullptr,
1229 });
1230 }
1231 renderpass = *cache_pair->second;
1232 render_area = VkExtent2D{ 1197 render_area = VkExtent2D{
1233 .width = key.size.width, 1198 .width = key.size.width,
1234 .height = key.size.height, 1199 .height = key.size.height,
1235 }; 1200 };
1236 num_color_buffers = static_cast<u32>(num_colors); 1201 num_color_buffers = static_cast<u32>(num_colors);
1237 framebuffer = device.CreateFramebuffer(VkFramebufferCreateInfo{ 1202 framebuffer = runtime.device.GetLogical().CreateFramebuffer({
1238 .sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO, 1203 .sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO,
1239 .pNext = nullptr, 1204 .pNext = nullptr,
1240 .flags = 0, 1205 .flags = 0,
diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.h b/src/video_core/renderer_vulkan/vk_texture_cache.h
index 172bcdf98..0b73d55f8 100644
--- a/src/video_core/renderer_vulkan/vk_texture_cache.h
+++ b/src/video_core/renderer_vulkan/vk_texture_cache.h
@@ -7,6 +7,7 @@
7#include <compare> 7#include <compare>
8#include <span> 8#include <span>
9 9
10#include "shader_recompiler/shader_info.h"
10#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" 11#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h"
11#include "video_core/texture_cache/texture_cache.h" 12#include "video_core/texture_cache/texture_cache.h"
12#include "video_core/vulkan_common/vulkan_memory_allocator.h" 13#include "video_core/vulkan_common/vulkan_memory_allocator.h"
@@ -26,35 +27,10 @@ class Device;
26class Image; 27class Image;
27class ImageView; 28class ImageView;
28class Framebuffer; 29class Framebuffer;
30class RenderPassCache;
29class StagingBufferPool; 31class StagingBufferPool;
30class VKScheduler; 32class VKScheduler;
31 33
32struct RenderPassKey {
33 constexpr auto operator<=>(const RenderPassKey&) const noexcept = default;
34
35 std::array<PixelFormat, NUM_RT> color_formats;
36 PixelFormat depth_format;
37 VkSampleCountFlagBits samples;
38};
39
40} // namespace Vulkan
41
42namespace std {
43template <>
44struct hash<Vulkan::RenderPassKey> {
45 [[nodiscard]] constexpr size_t operator()(const Vulkan::RenderPassKey& key) const noexcept {
46 size_t value = static_cast<size_t>(key.depth_format) << 48;
47 value ^= static_cast<size_t>(key.samples) << 52;
48 for (size_t i = 0; i < key.color_formats.size(); ++i) {
49 value ^= static_cast<size_t>(key.color_formats[i]) << (i * 6);
50 }
51 return value;
52 }
53};
54} // namespace std
55
56namespace Vulkan {
57
58struct TextureCacheRuntime { 34struct TextureCacheRuntime {
59 const Device& device; 35 const Device& device;
60 VKScheduler& scheduler; 36 VKScheduler& scheduler;
@@ -62,13 +38,13 @@ struct TextureCacheRuntime {
62 StagingBufferPool& staging_buffer_pool; 38 StagingBufferPool& staging_buffer_pool;
63 BlitImageHelper& blit_image_helper; 39 BlitImageHelper& blit_image_helper;
64 ASTCDecoderPass& astc_decoder_pass; 40 ASTCDecoderPass& astc_decoder_pass;
65 std::unordered_map<RenderPassKey, vk::RenderPass> renderpass_cache{}; 41 RenderPassCache& render_pass_cache;
66 42
67 void Finish(); 43 void Finish();
68 44
69 [[nodiscard]] StagingBufferRef UploadStagingBuffer(size_t size); 45 StagingBufferRef UploadStagingBuffer(size_t size);
70 46
71 [[nodiscard]] StagingBufferRef DownloadStagingBuffer(size_t size); 47 StagingBufferRef DownloadStagingBuffer(size_t size);
72 48
73 void BlitImage(Framebuffer* dst_framebuffer, ImageView& dst, ImageView& src, 49 void BlitImage(Framebuffer* dst_framebuffer, ImageView& dst, ImageView& src,
74 const Region2D& dst_region, const Region2D& src_region, 50 const Region2D& dst_region, const Region2D& src_region,
@@ -79,7 +55,7 @@ struct TextureCacheRuntime {
79 55
80 void ConvertImage(Framebuffer* dst, ImageView& dst_view, ImageView& src_view); 56 void ConvertImage(Framebuffer* dst, ImageView& dst_view, ImageView& src_view);
81 57
82 [[nodiscard]] bool CanAccelerateImageUpload(Image&) const noexcept { 58 bool CanAccelerateImageUpload(Image&) const noexcept {
83 return false; 59 return false;
84 } 60 }
85 61
@@ -117,8 +93,6 @@ public:
117 void UploadMemory(const StagingBufferRef& map, 93 void UploadMemory(const StagingBufferRef& map,
118 std::span<const VideoCommon::BufferImageCopy> copies); 94 std::span<const VideoCommon::BufferImageCopy> copies);
119 95
120 void UploadMemory(const StagingBufferRef& map, std::span<const VideoCommon::BufferCopy> copies);
121
122 void DownloadMemory(const StagingBufferRef& map, 96 void DownloadMemory(const StagingBufferRef& map,
123 std::span<const VideoCommon::BufferImageCopy> copies); 97 std::span<const VideoCommon::BufferImageCopy> copies);
124 98
@@ -126,10 +100,6 @@ public:
126 return *image; 100 return *image;
127 } 101 }
128 102
129 [[nodiscard]] VkBuffer Buffer() const noexcept {
130 return *buffer;
131 }
132
133 [[nodiscard]] VkImageAspectFlags AspectMask() const noexcept { 103 [[nodiscard]] VkImageAspectFlags AspectMask() const noexcept {
134 return aspect_mask; 104 return aspect_mask;
135 } 105 }
@@ -146,7 +116,6 @@ public:
146private: 116private:
147 VKScheduler* scheduler; 117 VKScheduler* scheduler;
148 vk::Image image; 118 vk::Image image;
149 vk::Buffer buffer;
150 MemoryCommit commit; 119 MemoryCommit commit;
151 vk::ImageView image_view; 120 vk::ImageView image_view;
152 std::vector<vk::ImageView> storage_image_views; 121 std::vector<vk::ImageView> storage_image_views;
@@ -157,18 +126,19 @@ private:
157class ImageView : public VideoCommon::ImageViewBase { 126class ImageView : public VideoCommon::ImageViewBase {
158public: 127public:
159 explicit ImageView(TextureCacheRuntime&, const VideoCommon::ImageViewInfo&, ImageId, Image&); 128 explicit ImageView(TextureCacheRuntime&, const VideoCommon::ImageViewInfo&, ImageId, Image&);
129 explicit ImageView(TextureCacheRuntime&, const VideoCommon::ImageInfo&,
130 const VideoCommon::ImageViewInfo&, GPUVAddr);
160 explicit ImageView(TextureCacheRuntime&, const VideoCommon::NullImageParams&); 131 explicit ImageView(TextureCacheRuntime&, const VideoCommon::NullImageParams&);
161 132
162 [[nodiscard]] VkImageView DepthView(); 133 [[nodiscard]] VkImageView DepthView();
163 134
164 [[nodiscard]] VkImageView StencilView(); 135 [[nodiscard]] VkImageView StencilView();
165 136
166 [[nodiscard]] VkImageView Handle(VideoCommon::ImageViewType query_type) const noexcept { 137 [[nodiscard]] VkImageView StorageView(Shader::TextureType texture_type,
167 return *image_views[static_cast<size_t>(query_type)]; 138 Shader::ImageFormat image_format);
168 }
169 139
170 [[nodiscard]] VkBufferView BufferView() const noexcept { 140 [[nodiscard]] VkImageView Handle(Shader::TextureType texture_type) const noexcept {
171 return *buffer_view; 141 return *image_views[static_cast<size_t>(texture_type)];
172 } 142 }
173 143
174 [[nodiscard]] VkImage ImageHandle() const noexcept { 144 [[nodiscard]] VkImage ImageHandle() const noexcept {
@@ -179,26 +149,36 @@ public:
179 return render_target; 149 return render_target;
180 } 150 }
181 151
182 [[nodiscard]] PixelFormat ImageFormat() const noexcept {
183 return image_format;
184 }
185
186 [[nodiscard]] VkSampleCountFlagBits Samples() const noexcept { 152 [[nodiscard]] VkSampleCountFlagBits Samples() const noexcept {
187 return samples; 153 return samples;
188 } 154 }
189 155
156 [[nodiscard]] GPUVAddr GpuAddr() const noexcept {
157 return gpu_addr;
158 }
159
160 [[nodiscard]] u32 BufferSize() const noexcept {
161 return buffer_size;
162 }
163
190private: 164private:
191 [[nodiscard]] vk::ImageView MakeDepthStencilView(VkImageAspectFlags aspect_mask); 165 struct StorageViews {
166 std::array<vk::ImageView, Shader::NUM_TEXTURE_TYPES> signeds;
167 std::array<vk::ImageView, Shader::NUM_TEXTURE_TYPES> unsigneds;
168 };
169
170 [[nodiscard]] vk::ImageView MakeView(VkFormat vk_format, VkImageAspectFlags aspect_mask);
192 171
193 const Device* device = nullptr; 172 const Device* device = nullptr;
194 std::array<vk::ImageView, VideoCommon::NUM_IMAGE_VIEW_TYPES> image_views; 173 std::array<vk::ImageView, Shader::NUM_TEXTURE_TYPES> image_views;
174 std::unique_ptr<StorageViews> storage_views;
195 vk::ImageView depth_view; 175 vk::ImageView depth_view;
196 vk::ImageView stencil_view; 176 vk::ImageView stencil_view;
197 vk::BufferView buffer_view;
198 VkImage image_handle = VK_NULL_HANDLE; 177 VkImage image_handle = VK_NULL_HANDLE;
199 VkImageView render_target = VK_NULL_HANDLE; 178 VkImageView render_target = VK_NULL_HANDLE;
200 PixelFormat image_format = PixelFormat::Invalid;
201 VkSampleCountFlagBits samples = VK_SAMPLE_COUNT_1_BIT; 179 VkSampleCountFlagBits samples = VK_SAMPLE_COUNT_1_BIT;
180 GPUVAddr gpu_addr = 0;
181 u32 buffer_size = 0;
202}; 182};
203 183
204class ImageAlloc : public VideoCommon::ImageAllocBase {}; 184class ImageAlloc : public VideoCommon::ImageAllocBase {};
diff --git a/src/video_core/renderer_vulkan/vk_update_descriptor.cpp b/src/video_core/renderer_vulkan/vk_update_descriptor.cpp
index dc45fdcb1..0df3a7fe9 100644
--- a/src/video_core/renderer_vulkan/vk_update_descriptor.cpp
+++ b/src/video_core/renderer_vulkan/vk_update_descriptor.cpp
@@ -15,7 +15,9 @@
15namespace Vulkan { 15namespace Vulkan {
16 16
17VKUpdateDescriptorQueue::VKUpdateDescriptorQueue(const Device& device_, VKScheduler& scheduler_) 17VKUpdateDescriptorQueue::VKUpdateDescriptorQueue(const Device& device_, VKScheduler& scheduler_)
18 : device{device_}, scheduler{scheduler_} {} 18 : device{device_}, scheduler{scheduler_} {
19 payload_cursor = payload.data();
20}
19 21
20VKUpdateDescriptorQueue::~VKUpdateDescriptorQueue() = default; 22VKUpdateDescriptorQueue::~VKUpdateDescriptorQueue() = default;
21 23
@@ -36,13 +38,4 @@ void VKUpdateDescriptorQueue::Acquire() {
36 upload_start = payload_cursor; 38 upload_start = payload_cursor;
37} 39}
38 40
39void VKUpdateDescriptorQueue::Send(VkDescriptorUpdateTemplateKHR update_template,
40 VkDescriptorSet set) {
41 const void* const data = upload_start;
42 const vk::Device* const logical = &device.GetLogical();
43 scheduler.Record([data, logical, set, update_template](vk::CommandBuffer) {
44 logical->UpdateDescriptorSet(set, update_template, data);
45 });
46}
47
48} // namespace Vulkan 41} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_update_descriptor.h b/src/video_core/renderer_vulkan/vk_update_descriptor.h
index d35e77c44..d7de4c490 100644
--- a/src/video_core/renderer_vulkan/vk_update_descriptor.h
+++ b/src/video_core/renderer_vulkan/vk_update_descriptor.h
@@ -39,7 +39,9 @@ public:
39 39
40 void Acquire(); 40 void Acquire();
41 41
42 void Send(VkDescriptorUpdateTemplateKHR update_template, VkDescriptorSet set); 42 const DescriptorUpdateEntry* UpdateData() const noexcept {
43 return upload_start;
44 }
43 45
44 void AddSampledImage(VkImageView image_view, VkSampler sampler) { 46 void AddSampledImage(VkImageView image_view, VkSampler sampler) {
45 *(payload_cursor++) = VkDescriptorImageInfo{ 47 *(payload_cursor++) = VkDescriptorImageInfo{
diff --git a/src/video_core/shader/ast.cpp b/src/video_core/shader/ast.cpp
deleted file mode 100644
index db11144c7..000000000
--- a/src/video_core/shader/ast.cpp
+++ /dev/null
@@ -1,752 +0,0 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <string>
6#include <string_view>
7
8#include <fmt/format.h>
9
10#include "common/assert.h"
11#include "common/common_types.h"
12#include "video_core/shader/ast.h"
13#include "video_core/shader/expr.h"
14
15namespace VideoCommon::Shader {
16
17ASTZipper::ASTZipper() = default;
18
19void ASTZipper::Init(const ASTNode new_first, const ASTNode parent) {
20 ASSERT(new_first->manager == nullptr);
21 first = new_first;
22 last = new_first;
23
24 ASTNode current = first;
25 while (current) {
26 current->manager = this;
27 current->parent = parent;
28 last = current;
29 current = current->next;
30 }
31}
32
33void ASTZipper::PushBack(const ASTNode new_node) {
34 ASSERT(new_node->manager == nullptr);
35 new_node->previous = last;
36 if (last) {
37 last->next = new_node;
38 }
39 new_node->next.reset();
40 last = new_node;
41 if (!first) {
42 first = new_node;
43 }
44 new_node->manager = this;
45}
46
47void ASTZipper::PushFront(const ASTNode new_node) {
48 ASSERT(new_node->manager == nullptr);
49 new_node->previous.reset();
50 new_node->next = first;
51 if (first) {
52 first->previous = new_node;
53 }
54 if (last == first) {
55 last = new_node;
56 }
57 first = new_node;
58 new_node->manager = this;
59}
60
61void ASTZipper::InsertAfter(const ASTNode new_node, const ASTNode at_node) {
62 ASSERT(new_node->manager == nullptr);
63 if (!at_node) {
64 PushFront(new_node);
65 return;
66 }
67 const ASTNode next = at_node->next;
68 if (next) {
69 next->previous = new_node;
70 }
71 new_node->previous = at_node;
72 if (at_node == last) {
73 last = new_node;
74 }
75 new_node->next = next;
76 at_node->next = new_node;
77 new_node->manager = this;
78}
79
80void ASTZipper::InsertBefore(const ASTNode new_node, const ASTNode at_node) {
81 ASSERT(new_node->manager == nullptr);
82 if (!at_node) {
83 PushBack(new_node);
84 return;
85 }
86 const ASTNode previous = at_node->previous;
87 if (previous) {
88 previous->next = new_node;
89 }
90 new_node->next = at_node;
91 if (at_node == first) {
92 first = new_node;
93 }
94 new_node->previous = previous;
95 at_node->previous = new_node;
96 new_node->manager = this;
97}
98
99void ASTZipper::DetachTail(ASTNode node) {
100 ASSERT(node->manager == this);
101 if (node == first) {
102 first.reset();
103 last.reset();
104 return;
105 }
106
107 last = node->previous;
108 last->next.reset();
109 node->previous.reset();
110
111 ASTNode current = std::move(node);
112 while (current) {
113 current->manager = nullptr;
114 current->parent.reset();
115 current = current->next;
116 }
117}
118
119void ASTZipper::DetachSegment(const ASTNode start, const ASTNode end) {
120 ASSERT(start->manager == this && end->manager == this);
121 if (start == end) {
122 DetachSingle(start);
123 return;
124 }
125 const ASTNode prev = start->previous;
126 const ASTNode post = end->next;
127 if (!prev) {
128 first = post;
129 } else {
130 prev->next = post;
131 }
132 if (!post) {
133 last = prev;
134 } else {
135 post->previous = prev;
136 }
137 start->previous.reset();
138 end->next.reset();
139 ASTNode current = start;
140 bool found = false;
141 while (current) {
142 current->manager = nullptr;
143 current->parent.reset();
144 found |= current == end;
145 current = current->next;
146 }
147 ASSERT(found);
148}
149
150void ASTZipper::DetachSingle(const ASTNode node) {
151 ASSERT(node->manager == this);
152 const ASTNode prev = node->previous;
153 const ASTNode post = node->next;
154 node->previous.reset();
155 node->next.reset();
156 if (!prev) {
157 first = post;
158 } else {
159 prev->next = post;
160 }
161 if (!post) {
162 last = prev;
163 } else {
164 post->previous = prev;
165 }
166
167 node->manager = nullptr;
168 node->parent.reset();
169}
170
171void ASTZipper::Remove(const ASTNode node) {
172 ASSERT(node->manager == this);
173 const ASTNode next = node->next;
174 const ASTNode previous = node->previous;
175 if (previous) {
176 previous->next = next;
177 }
178 if (next) {
179 next->previous = previous;
180 }
181 node->parent.reset();
182 node->manager = nullptr;
183 if (node == last) {
184 last = previous;
185 }
186 if (node == first) {
187 first = next;
188 }
189}
190
191class ExprPrinter final {
192public:
193 void operator()(const ExprAnd& expr) {
194 inner += "( ";
195 std::visit(*this, *expr.operand1);
196 inner += " && ";
197 std::visit(*this, *expr.operand2);
198 inner += ')';
199 }
200
201 void operator()(const ExprOr& expr) {
202 inner += "( ";
203 std::visit(*this, *expr.operand1);
204 inner += " || ";
205 std::visit(*this, *expr.operand2);
206 inner += ')';
207 }
208
209 void operator()(const ExprNot& expr) {
210 inner += "!";
211 std::visit(*this, *expr.operand1);
212 }
213
214 void operator()(const ExprPredicate& expr) {
215 inner += fmt::format("P{}", expr.predicate);
216 }
217
218 void operator()(const ExprCondCode& expr) {
219 inner += fmt::format("CC{}", expr.cc);
220 }
221
222 void operator()(const ExprVar& expr) {
223 inner += fmt::format("V{}", expr.var_index);
224 }
225
226 void operator()(const ExprBoolean& expr) {
227 inner += expr.value ? "true" : "false";
228 }
229
230 void operator()(const ExprGprEqual& expr) {
231 inner += fmt::format("(gpr_{} == {})", expr.gpr, expr.value);
232 }
233
234 const std::string& GetResult() const {
235 return inner;
236 }
237
238private:
239 std::string inner;
240};
241
242class ASTPrinter {
243public:
244 void operator()(const ASTProgram& ast) {
245 scope++;
246 inner += "program {\n";
247 ASTNode current = ast.nodes.GetFirst();
248 while (current) {
249 Visit(current);
250 current = current->GetNext();
251 }
252 inner += "}\n";
253 scope--;
254 }
255
256 void operator()(const ASTIfThen& ast) {
257 ExprPrinter expr_parser{};
258 std::visit(expr_parser, *ast.condition);
259 inner += fmt::format("{}if ({}) {{\n", Indent(), expr_parser.GetResult());
260 scope++;
261 ASTNode current = ast.nodes.GetFirst();
262 while (current) {
263 Visit(current);
264 current = current->GetNext();
265 }
266 scope--;
267 inner += fmt::format("{}}}\n", Indent());
268 }
269
270 void operator()(const ASTIfElse& ast) {
271 inner += Indent();
272 inner += "else {\n";
273
274 scope++;
275 ASTNode current = ast.nodes.GetFirst();
276 while (current) {
277 Visit(current);
278 current = current->GetNext();
279 }
280 scope--;
281
282 inner += Indent();
283 inner += "}\n";
284 }
285
286 void operator()(const ASTBlockEncoded& ast) {
287 inner += fmt::format("{}Block({}, {});\n", Indent(), ast.start, ast.end);
288 }
289
290 void operator()([[maybe_unused]] const ASTBlockDecoded& ast) {
291 inner += Indent();
292 inner += "Block;\n";
293 }
294
295 void operator()(const ASTVarSet& ast) {
296 ExprPrinter expr_parser{};
297 std::visit(expr_parser, *ast.condition);
298 inner += fmt::format("{}V{} := {};\n", Indent(), ast.index, expr_parser.GetResult());
299 }
300
301 void operator()(const ASTLabel& ast) {
302 inner += fmt::format("Label_{}:\n", ast.index);
303 }
304
305 void operator()(const ASTGoto& ast) {
306 ExprPrinter expr_parser{};
307 std::visit(expr_parser, *ast.condition);
308 inner +=
309 fmt::format("{}({}) -> goto Label_{};\n", Indent(), expr_parser.GetResult(), ast.label);
310 }
311
312 void operator()(const ASTDoWhile& ast) {
313 ExprPrinter expr_parser{};
314 std::visit(expr_parser, *ast.condition);
315 inner += fmt::format("{}do {{\n", Indent());
316 scope++;
317 ASTNode current = ast.nodes.GetFirst();
318 while (current) {
319 Visit(current);
320 current = current->GetNext();
321 }
322 scope--;
323 inner += fmt::format("{}}} while ({});\n", Indent(), expr_parser.GetResult());
324 }
325
326 void operator()(const ASTReturn& ast) {
327 ExprPrinter expr_parser{};
328 std::visit(expr_parser, *ast.condition);
329 inner += fmt::format("{}({}) -> {};\n", Indent(), expr_parser.GetResult(),
330 ast.kills ? "discard" : "exit");
331 }
332
333 void operator()(const ASTBreak& ast) {
334 ExprPrinter expr_parser{};
335 std::visit(expr_parser, *ast.condition);
336 inner += fmt::format("{}({}) -> break;\n", Indent(), expr_parser.GetResult());
337 }
338
339 void Visit(const ASTNode& node) {
340 std::visit(*this, *node->GetInnerData());
341 }
342
343 const std::string& GetResult() const {
344 return inner;
345 }
346
347private:
348 std::string_view Indent() {
349 if (space_segment_scope == scope) {
350 return space_segment;
351 }
352
353 // Ensure that we don't exceed our view.
354 ASSERT(scope * 2 < spaces.size());
355
356 space_segment = spaces.substr(0, scope * 2);
357 space_segment_scope = scope;
358 return space_segment;
359 }
360
361 std::string inner{};
362 std::string_view space_segment;
363
364 u32 scope{};
365 u32 space_segment_scope{};
366
367 static constexpr std::string_view spaces{" "};
368};
369
370std::string ASTManager::Print() const {
371 ASTPrinter printer{};
372 printer.Visit(main_node);
373 return printer.GetResult();
374}
375
376ASTManager::ASTManager(bool do_full_decompile, bool disable_else_derivation_)
377 : full_decompile{do_full_decompile}, disable_else_derivation{disable_else_derivation_} {}
378
379ASTManager::~ASTManager() {
380 Clear();
381}
382
383void ASTManager::Init() {
384 main_node = ASTBase::Make<ASTProgram>(ASTNode{});
385 program = std::get_if<ASTProgram>(main_node->GetInnerData());
386 false_condition = MakeExpr<ExprBoolean>(false);
387}
388
389void ASTManager::DeclareLabel(u32 address) {
390 const auto pair = labels_map.emplace(address, labels_count);
391 if (pair.second) {
392 labels_count++;
393 labels.resize(labels_count);
394 }
395}
396
397void ASTManager::InsertLabel(u32 address) {
398 const u32 index = labels_map[address];
399 const ASTNode label = ASTBase::Make<ASTLabel>(main_node, index);
400 labels[index] = label;
401 program->nodes.PushBack(label);
402}
403
404void ASTManager::InsertGoto(Expr condition, u32 address) {
405 const u32 index = labels_map[address];
406 const ASTNode goto_node = ASTBase::Make<ASTGoto>(main_node, std::move(condition), index);
407 gotos.push_back(goto_node);
408 program->nodes.PushBack(goto_node);
409}
410
411void ASTManager::InsertBlock(u32 start_address, u32 end_address) {
412 ASTNode block = ASTBase::Make<ASTBlockEncoded>(main_node, start_address, end_address);
413 program->nodes.PushBack(std::move(block));
414}
415
416void ASTManager::InsertReturn(Expr condition, bool kills) {
417 ASTNode node = ASTBase::Make<ASTReturn>(main_node, std::move(condition), kills);
418 program->nodes.PushBack(std::move(node));
419}
420
421// The decompile algorithm is based on
422// "Taming control flow: A structured approach to eliminating goto statements"
423// by AM Erosa, LJ Hendren 1994. In general, the idea is to get gotos to be
424// on the same structured level as the label which they jump to. This is done,
425// through outward/inward movements and lifting. Once they are at the same
426// level, you can enclose them in an "if" structure or a "do-while" structure.
427void ASTManager::Decompile() {
428 auto it = gotos.begin();
429 while (it != gotos.end()) {
430 const ASTNode goto_node = *it;
431 const auto label_index = goto_node->GetGotoLabel();
432 if (!label_index) {
433 return;
434 }
435 const ASTNode label = labels[*label_index];
436 if (!full_decompile) {
437 // We only decompile backward jumps
438 if (!IsBackwardsJump(goto_node, label)) {
439 it++;
440 continue;
441 }
442 }
443 if (IndirectlyRelated(goto_node, label)) {
444 while (!DirectlyRelated(goto_node, label)) {
445 MoveOutward(goto_node);
446 }
447 }
448 if (DirectlyRelated(goto_node, label)) {
449 u32 goto_level = goto_node->GetLevel();
450 const u32 label_level = label->GetLevel();
451 while (label_level < goto_level) {
452 MoveOutward(goto_node);
453 goto_level--;
454 }
455 // TODO(Blinkhawk): Implement Lifting and Inward Movements
456 }
457 if (label->GetParent() == goto_node->GetParent()) {
458 bool is_loop = false;
459 ASTNode current = goto_node->GetPrevious();
460 while (current) {
461 if (current == label) {
462 is_loop = true;
463 break;
464 }
465 current = current->GetPrevious();
466 }
467
468 if (is_loop) {
469 EncloseDoWhile(goto_node, label);
470 } else {
471 EncloseIfThen(goto_node, label);
472 }
473 it = gotos.erase(it);
474 continue;
475 }
476 it++;
477 }
478 if (full_decompile) {
479 for (const ASTNode& label : labels) {
480 auto& manager = label->GetManager();
481 manager.Remove(label);
482 }
483 labels.clear();
484 } else {
485 auto label_it = labels.begin();
486 while (label_it != labels.end()) {
487 bool can_remove = true;
488 ASTNode label = *label_it;
489 for (const ASTNode& goto_node : gotos) {
490 const auto label_index = goto_node->GetGotoLabel();
491 if (!label_index) {
492 return;
493 }
494 ASTNode& glabel = labels[*label_index];
495 if (glabel == label) {
496 can_remove = false;
497 break;
498 }
499 }
500 if (can_remove) {
501 label->MarkLabelUnused();
502 }
503 }
504 }
505}
506
507bool ASTManager::IsBackwardsJump(ASTNode goto_node, ASTNode label_node) const {
508 u32 goto_level = goto_node->GetLevel();
509 u32 label_level = label_node->GetLevel();
510 while (goto_level > label_level) {
511 goto_level--;
512 goto_node = goto_node->GetParent();
513 }
514 while (label_level > goto_level) {
515 label_level--;
516 label_node = label_node->GetParent();
517 }
518 while (goto_node->GetParent() != label_node->GetParent()) {
519 goto_node = goto_node->GetParent();
520 label_node = label_node->GetParent();
521 }
522 ASTNode current = goto_node->GetPrevious();
523 while (current) {
524 if (current == label_node) {
525 return true;
526 }
527 current = current->GetPrevious();
528 }
529 return false;
530}
531
532bool ASTManager::IndirectlyRelated(const ASTNode& first, const ASTNode& second) const {
533 return !(first->GetParent() == second->GetParent() || DirectlyRelated(first, second));
534}
535
536bool ASTManager::DirectlyRelated(const ASTNode& first, const ASTNode& second) const {
537 if (first->GetParent() == second->GetParent()) {
538 return false;
539 }
540 const u32 first_level = first->GetLevel();
541 const u32 second_level = second->GetLevel();
542 u32 min_level;
543 u32 max_level;
544 ASTNode max;
545 ASTNode min;
546 if (first_level > second_level) {
547 min_level = second_level;
548 min = second;
549 max_level = first_level;
550 max = first;
551 } else {
552 min_level = first_level;
553 min = first;
554 max_level = second_level;
555 max = second;
556 }
557
558 while (max_level > min_level) {
559 max_level--;
560 max = max->GetParent();
561 }
562
563 return min->GetParent() == max->GetParent();
564}
565
566void ASTManager::ShowCurrentState(std::string_view state) const {
567 LOG_CRITICAL(HW_GPU, "\nState {}:\n\n{}\n", state, Print());
568 SanityCheck();
569}
570
571void ASTManager::SanityCheck() const {
572 for (const auto& label : labels) {
573 if (!label->GetParent()) {
574 LOG_CRITICAL(HW_GPU, "Sanity Check Failed");
575 }
576 }
577}
578
579void ASTManager::EncloseDoWhile(ASTNode goto_node, ASTNode label) {
580 ASTZipper& zipper = goto_node->GetManager();
581 const ASTNode loop_start = label->GetNext();
582 if (loop_start == goto_node) {
583 zipper.Remove(goto_node);
584 return;
585 }
586 const ASTNode parent = label->GetParent();
587 const Expr condition = goto_node->GetGotoCondition();
588 zipper.DetachSegment(loop_start, goto_node);
589 const ASTNode do_while_node = ASTBase::Make<ASTDoWhile>(parent, condition);
590 ASTZipper* sub_zipper = do_while_node->GetSubNodes();
591 sub_zipper->Init(loop_start, do_while_node);
592 zipper.InsertAfter(do_while_node, label);
593 sub_zipper->Remove(goto_node);
594}
595
596void ASTManager::EncloseIfThen(ASTNode goto_node, ASTNode label) {
597 ASTZipper& zipper = goto_node->GetManager();
598 const ASTNode if_end = label->GetPrevious();
599 if (if_end == goto_node) {
600 zipper.Remove(goto_node);
601 return;
602 }
603 const ASTNode prev = goto_node->GetPrevious();
604 const Expr condition = goto_node->GetGotoCondition();
605 bool do_else = false;
606 if (!disable_else_derivation && prev->IsIfThen()) {
607 const Expr if_condition = prev->GetIfCondition();
608 do_else = ExprAreEqual(if_condition, condition);
609 }
610 const ASTNode parent = label->GetParent();
611 zipper.DetachSegment(goto_node, if_end);
612 ASTNode if_node;
613 if (do_else) {
614 if_node = ASTBase::Make<ASTIfElse>(parent);
615 } else {
616 Expr neg_condition = MakeExprNot(condition);
617 if_node = ASTBase::Make<ASTIfThen>(parent, neg_condition);
618 }
619 ASTZipper* sub_zipper = if_node->GetSubNodes();
620 sub_zipper->Init(goto_node, if_node);
621 zipper.InsertAfter(if_node, prev);
622 sub_zipper->Remove(goto_node);
623}
624
625void ASTManager::MoveOutward(ASTNode goto_node) {
626 ASTZipper& zipper = goto_node->GetManager();
627 const ASTNode parent = goto_node->GetParent();
628 ASTZipper& zipper2 = parent->GetManager();
629 const ASTNode grandpa = parent->GetParent();
630 const bool is_loop = parent->IsLoop();
631 const bool is_else = parent->IsIfElse();
632 const bool is_if = parent->IsIfThen();
633
634 const ASTNode prev = goto_node->GetPrevious();
635 const ASTNode post = goto_node->GetNext();
636
637 const Expr condition = goto_node->GetGotoCondition();
638 zipper.DetachSingle(goto_node);
639 if (is_loop) {
640 const u32 var_index = NewVariable();
641 const Expr var_condition = MakeExpr<ExprVar>(var_index);
642 const ASTNode var_node = ASTBase::Make<ASTVarSet>(parent, var_index, condition);
643 const ASTNode var_node_init = ASTBase::Make<ASTVarSet>(parent, var_index, false_condition);
644 zipper2.InsertBefore(var_node_init, parent);
645 zipper.InsertAfter(var_node, prev);
646 goto_node->SetGotoCondition(var_condition);
647 const ASTNode break_node = ASTBase::Make<ASTBreak>(parent, var_condition);
648 zipper.InsertAfter(break_node, var_node);
649 } else if (is_if || is_else) {
650 const u32 var_index = NewVariable();
651 const Expr var_condition = MakeExpr<ExprVar>(var_index);
652 const ASTNode var_node = ASTBase::Make<ASTVarSet>(parent, var_index, condition);
653 const ASTNode var_node_init = ASTBase::Make<ASTVarSet>(parent, var_index, false_condition);
654 if (is_if) {
655 zipper2.InsertBefore(var_node_init, parent);
656 } else {
657 zipper2.InsertBefore(var_node_init, parent->GetPrevious());
658 }
659 zipper.InsertAfter(var_node, prev);
660 goto_node->SetGotoCondition(var_condition);
661 if (post) {
662 zipper.DetachTail(post);
663 const ASTNode if_node = ASTBase::Make<ASTIfThen>(parent, MakeExprNot(var_condition));
664 ASTZipper* sub_zipper = if_node->GetSubNodes();
665 sub_zipper->Init(post, if_node);
666 zipper.InsertAfter(if_node, var_node);
667 }
668 } else {
669 UNREACHABLE();
670 }
671 const ASTNode next = parent->GetNext();
672 if (is_if && next && next->IsIfElse()) {
673 zipper2.InsertAfter(goto_node, next);
674 goto_node->SetParent(grandpa);
675 return;
676 }
677 zipper2.InsertAfter(goto_node, parent);
678 goto_node->SetParent(grandpa);
679}
680
681class ASTClearer {
682public:
683 ASTClearer() = default;
684
685 void operator()(const ASTProgram& ast) {
686 ASTNode current = ast.nodes.GetFirst();
687 while (current) {
688 Visit(current);
689 current = current->GetNext();
690 }
691 }
692
693 void operator()(const ASTIfThen& ast) {
694 ASTNode current = ast.nodes.GetFirst();
695 while (current) {
696 Visit(current);
697 current = current->GetNext();
698 }
699 }
700
701 void operator()(const ASTIfElse& ast) {
702 ASTNode current = ast.nodes.GetFirst();
703 while (current) {
704 Visit(current);
705 current = current->GetNext();
706 }
707 }
708
709 void operator()([[maybe_unused]] const ASTBlockEncoded& ast) {}
710
711 void operator()(ASTBlockDecoded& ast) {
712 ast.nodes.clear();
713 }
714
715 void operator()([[maybe_unused]] const ASTVarSet& ast) {}
716
717 void operator()([[maybe_unused]] const ASTLabel& ast) {}
718
719 void operator()([[maybe_unused]] const ASTGoto& ast) {}
720
721 void operator()(const ASTDoWhile& ast) {
722 ASTNode current = ast.nodes.GetFirst();
723 while (current) {
724 Visit(current);
725 current = current->GetNext();
726 }
727 }
728
729 void operator()([[maybe_unused]] const ASTReturn& ast) {}
730
731 void operator()([[maybe_unused]] const ASTBreak& ast) {}
732
733 void Visit(const ASTNode& node) {
734 std::visit(*this, *node->GetInnerData());
735 node->Clear();
736 }
737};
738
739void ASTManager::Clear() {
740 if (!main_node) {
741 return;
742 }
743 ASTClearer clearer{};
744 clearer.Visit(main_node);
745 main_node.reset();
746 program = nullptr;
747 labels_map.clear();
748 labels.clear();
749 gotos.clear();
750}
751
752} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/ast.h b/src/video_core/shader/ast.h
deleted file mode 100644
index dc49b369e..000000000
--- a/src/video_core/shader/ast.h
+++ /dev/null
@@ -1,398 +0,0 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <functional>
8#include <list>
9#include <memory>
10#include <optional>
11#include <string>
12#include <unordered_map>
13#include <vector>
14
15#include "video_core/shader/expr.h"
16#include "video_core/shader/node.h"
17
18namespace VideoCommon::Shader {
19
20class ASTBase;
21class ASTBlockDecoded;
22class ASTBlockEncoded;
23class ASTBreak;
24class ASTDoWhile;
25class ASTGoto;
26class ASTIfElse;
27class ASTIfThen;
28class ASTLabel;
29class ASTProgram;
30class ASTReturn;
31class ASTVarSet;
32
33using ASTData = std::variant<ASTProgram, ASTIfThen, ASTIfElse, ASTBlockEncoded, ASTBlockDecoded,
34 ASTVarSet, ASTGoto, ASTLabel, ASTDoWhile, ASTReturn, ASTBreak>;
35
36using ASTNode = std::shared_ptr<ASTBase>;
37
38enum class ASTZipperType : u32 {
39 Program,
40 IfThen,
41 IfElse,
42 Loop,
43};
44
45class ASTZipper final {
46public:
47 explicit ASTZipper();
48
49 void Init(ASTNode first, ASTNode parent);
50
51 ASTNode GetFirst() const {
52 return first;
53 }
54
55 ASTNode GetLast() const {
56 return last;
57 }
58
59 void PushBack(ASTNode new_node);
60 void PushFront(ASTNode new_node);
61 void InsertAfter(ASTNode new_node, ASTNode at_node);
62 void InsertBefore(ASTNode new_node, ASTNode at_node);
63 void DetachTail(ASTNode node);
64 void DetachSingle(ASTNode node);
65 void DetachSegment(ASTNode start, ASTNode end);
66 void Remove(ASTNode node);
67
68 ASTNode first;
69 ASTNode last;
70};
71
72class ASTProgram {
73public:
74 ASTZipper nodes{};
75};
76
77class ASTIfThen {
78public:
79 explicit ASTIfThen(Expr condition_) : condition{std::move(condition_)} {}
80 Expr condition;
81 ASTZipper nodes{};
82};
83
84class ASTIfElse {
85public:
86 ASTZipper nodes{};
87};
88
89class ASTBlockEncoded {
90public:
91 explicit ASTBlockEncoded(u32 start_, u32 _) : start{start_}, end{_} {}
92 u32 start;
93 u32 end;
94};
95
96class ASTBlockDecoded {
97public:
98 explicit ASTBlockDecoded(NodeBlock&& new_nodes_) : nodes(std::move(new_nodes_)) {}
99 NodeBlock nodes;
100};
101
102class ASTVarSet {
103public:
104 explicit ASTVarSet(u32 index_, Expr condition_)
105 : index{index_}, condition{std::move(condition_)} {}
106
107 u32 index;
108 Expr condition;
109};
110
111class ASTLabel {
112public:
113 explicit ASTLabel(u32 index_) : index{index_} {}
114 u32 index;
115 bool unused{};
116};
117
118class ASTGoto {
119public:
120 explicit ASTGoto(Expr condition_, u32 label_)
121 : condition{std::move(condition_)}, label{label_} {}
122
123 Expr condition;
124 u32 label;
125};
126
127class ASTDoWhile {
128public:
129 explicit ASTDoWhile(Expr condition_) : condition{std::move(condition_)} {}
130 Expr condition;
131 ASTZipper nodes{};
132};
133
134class ASTReturn {
135public:
136 explicit ASTReturn(Expr condition_, bool kills_)
137 : condition{std::move(condition_)}, kills{kills_} {}
138
139 Expr condition;
140 bool kills;
141};
142
143class ASTBreak {
144public:
145 explicit ASTBreak(Expr condition_) : condition{std::move(condition_)} {}
146 Expr condition;
147};
148
149class ASTBase {
150public:
151 explicit ASTBase(ASTNode parent_, ASTData data_)
152 : data{std::move(data_)}, parent{std::move(parent_)} {}
153
154 template <class U, class... Args>
155 static ASTNode Make(ASTNode parent, Args&&... args) {
156 return std::make_shared<ASTBase>(std::move(parent),
157 ASTData(U(std::forward<Args>(args)...)));
158 }
159
160 void SetParent(ASTNode new_parent) {
161 parent = std::move(new_parent);
162 }
163
164 ASTNode& GetParent() {
165 return parent;
166 }
167
168 const ASTNode& GetParent() const {
169 return parent;
170 }
171
172 u32 GetLevel() const {
173 u32 level = 0;
174 auto next_parent = parent;
175 while (next_parent) {
176 next_parent = next_parent->GetParent();
177 level++;
178 }
179 return level;
180 }
181
182 ASTData* GetInnerData() {
183 return &data;
184 }
185
186 const ASTData* GetInnerData() const {
187 return &data;
188 }
189
190 ASTNode GetNext() const {
191 return next;
192 }
193
194 ASTNode GetPrevious() const {
195 return previous;
196 }
197
198 ASTZipper& GetManager() {
199 return *manager;
200 }
201
202 const ASTZipper& GetManager() const {
203 return *manager;
204 }
205
206 std::optional<u32> GetGotoLabel() const {
207 if (const auto* inner = std::get_if<ASTGoto>(&data)) {
208 return {inner->label};
209 }
210 return std::nullopt;
211 }
212
213 Expr GetGotoCondition() const {
214 if (const auto* inner = std::get_if<ASTGoto>(&data)) {
215 return inner->condition;
216 }
217 return nullptr;
218 }
219
220 void MarkLabelUnused() {
221 if (auto* inner = std::get_if<ASTLabel>(&data)) {
222 inner->unused = true;
223 }
224 }
225
226 bool IsLabelUnused() const {
227 if (const auto* inner = std::get_if<ASTLabel>(&data)) {
228 return inner->unused;
229 }
230 return true;
231 }
232
233 std::optional<u32> GetLabelIndex() const {
234 if (const auto* inner = std::get_if<ASTLabel>(&data)) {
235 return {inner->index};
236 }
237 return std::nullopt;
238 }
239
240 Expr GetIfCondition() const {
241 if (const auto* inner = std::get_if<ASTIfThen>(&data)) {
242 return inner->condition;
243 }
244 return nullptr;
245 }
246
247 void SetGotoCondition(Expr new_condition) {
248 if (auto* inner = std::get_if<ASTGoto>(&data)) {
249 inner->condition = std::move(new_condition);
250 }
251 }
252
253 bool IsIfThen() const {
254 return std::holds_alternative<ASTIfThen>(data);
255 }
256
257 bool IsIfElse() const {
258 return std::holds_alternative<ASTIfElse>(data);
259 }
260
261 bool IsBlockEncoded() const {
262 return std::holds_alternative<ASTBlockEncoded>(data);
263 }
264
265 void TransformBlockEncoded(NodeBlock&& nodes) {
266 data = ASTBlockDecoded(std::move(nodes));
267 }
268
269 bool IsLoop() const {
270 return std::holds_alternative<ASTDoWhile>(data);
271 }
272
273 ASTZipper* GetSubNodes() {
274 if (std::holds_alternative<ASTProgram>(data)) {
275 return &std::get_if<ASTProgram>(&data)->nodes;
276 }
277 if (std::holds_alternative<ASTIfThen>(data)) {
278 return &std::get_if<ASTIfThen>(&data)->nodes;
279 }
280 if (std::holds_alternative<ASTIfElse>(data)) {
281 return &std::get_if<ASTIfElse>(&data)->nodes;
282 }
283 if (std::holds_alternative<ASTDoWhile>(data)) {
284 return &std::get_if<ASTDoWhile>(&data)->nodes;
285 }
286 return nullptr;
287 }
288
289 void Clear() {
290 next.reset();
291 previous.reset();
292 parent.reset();
293 manager = nullptr;
294 }
295
296private:
297 friend class ASTZipper;
298
299 ASTData data;
300 ASTNode parent;
301 ASTNode next;
302 ASTNode previous;
303 ASTZipper* manager{};
304};
305
306class ASTManager final {
307public:
308 explicit ASTManager(bool do_full_decompile, bool disable_else_derivation_);
309 ~ASTManager();
310
311 ASTManager(const ASTManager& o) = delete;
312 ASTManager& operator=(const ASTManager& other) = delete;
313
314 ASTManager(ASTManager&& other) noexcept = default;
315 ASTManager& operator=(ASTManager&& other) noexcept = default;
316
317 void Init();
318
319 void DeclareLabel(u32 address);
320
321 void InsertLabel(u32 address);
322
323 void InsertGoto(Expr condition, u32 address);
324
325 void InsertBlock(u32 start_address, u32 end_address);
326
327 void InsertReturn(Expr condition, bool kills);
328
329 std::string Print() const;
330
331 void Decompile();
332
333 void ShowCurrentState(std::string_view state) const;
334
335 void SanityCheck() const;
336
337 void Clear();
338
339 bool IsFullyDecompiled() const {
340 if (full_decompile) {
341 return gotos.empty();
342 }
343
344 for (ASTNode goto_node : gotos) {
345 auto label_index = goto_node->GetGotoLabel();
346 if (!label_index) {
347 return false;
348 }
349 ASTNode glabel = labels[*label_index];
350 if (IsBackwardsJump(goto_node, glabel)) {
351 return false;
352 }
353 }
354 return true;
355 }
356
357 ASTNode GetProgram() const {
358 return main_node;
359 }
360
361 u32 GetVariables() const {
362 return variables;
363 }
364
365 const std::vector<ASTNode>& GetLabels() const {
366 return labels;
367 }
368
369private:
370 bool IsBackwardsJump(ASTNode goto_node, ASTNode label_node) const;
371
372 bool IndirectlyRelated(const ASTNode& first, const ASTNode& second) const;
373
374 bool DirectlyRelated(const ASTNode& first, const ASTNode& second) const;
375
376 void EncloseDoWhile(ASTNode goto_node, ASTNode label);
377
378 void EncloseIfThen(ASTNode goto_node, ASTNode label);
379
380 void MoveOutward(ASTNode goto_node);
381
382 u32 NewVariable() {
383 return variables++;
384 }
385
386 bool full_decompile{};
387 bool disable_else_derivation{};
388 std::unordered_map<u32, u32> labels_map{};
389 u32 labels_count{};
390 std::vector<ASTNode> labels{};
391 std::list<ASTNode> gotos{};
392 u32 variables{};
393 ASTProgram* program{};
394 ASTNode main_node{};
395 Expr false_condition{};
396};
397
398} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/async_shaders.cpp b/src/video_core/shader/async_shaders.cpp
deleted file mode 100644
index 02adcf9c7..000000000
--- a/src/video_core/shader/async_shaders.cpp
+++ /dev/null
@@ -1,234 +0,0 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <condition_variable>
6#include <mutex>
7#include <thread>
8#include <vector>
9#include "video_core/engines/maxwell_3d.h"
10#include "video_core/renderer_base.h"
11#include "video_core/renderer_opengl/gl_shader_cache.h"
12#include "video_core/shader/async_shaders.h"
13
14namespace VideoCommon::Shader {
15
16AsyncShaders::AsyncShaders(Core::Frontend::EmuWindow& emu_window_) : emu_window(emu_window_) {}
17
18AsyncShaders::~AsyncShaders() {
19 KillWorkers();
20}
21
22void AsyncShaders::AllocateWorkers() {
23 // Use at least one thread
24 u32 num_workers = 1;
25
26 // Deduce how many more threads we can use
27 const u32 thread_count = std::thread::hardware_concurrency();
28 if (thread_count >= 8) {
29 // Increase async workers by 1 for every 2 threads >= 8
30 num_workers += 1 + (thread_count - 8) / 2;
31 }
32
33 // If we already have workers queued, ignore
34 if (num_workers == worker_threads.size()) {
35 return;
36 }
37
38 // If workers already exist, clear them
39 if (!worker_threads.empty()) {
40 FreeWorkers();
41 }
42
43 // Create workers
44 for (std::size_t i = 0; i < num_workers; i++) {
45 context_list.push_back(emu_window.CreateSharedContext());
46 worker_threads.emplace_back(&AsyncShaders::ShaderCompilerThread, this,
47 context_list[i].get());
48 }
49}
50
51void AsyncShaders::FreeWorkers() {
52 // Mark all threads to quit
53 is_thread_exiting.store(true);
54 cv.notify_all();
55 for (auto& thread : worker_threads) {
56 thread.join();
57 }
58 // Clear our shared contexts
59 context_list.clear();
60
61 // Clear our worker threads
62 worker_threads.clear();
63}
64
65void AsyncShaders::KillWorkers() {
66 is_thread_exiting.store(true);
67 cv.notify_all();
68 for (auto& thread : worker_threads) {
69 thread.detach();
70 }
71 // Clear our shared contexts
72 context_list.clear();
73
74 // Clear our worker threads
75 worker_threads.clear();
76}
77
78bool AsyncShaders::HasWorkQueued() const {
79 return !pending_queue.empty();
80}
81
82bool AsyncShaders::HasCompletedWork() const {
83 std::shared_lock lock{completed_mutex};
84 return !finished_work.empty();
85}
86
87bool AsyncShaders::IsShaderAsync(const Tegra::GPU& gpu) const {
88 const auto& regs = gpu.Maxwell3D().regs;
89
90 // If something is using depth, we can assume that games are not rendering anything which will
91 // be used one time.
92 if (regs.zeta_enable) {
93 return true;
94 }
95
96 // If games are using a small index count, we can assume these are full screen quads. Usually
97 // these shaders are only used once for building textures so we can assume they can't be built
98 // async
99 if (regs.index_array.count <= 6 || regs.vertex_buffer.count <= 6) {
100 return false;
101 }
102
103 return true;
104}
105
106std::vector<AsyncShaders::Result> AsyncShaders::GetCompletedWork() {
107 std::vector<Result> results;
108 {
109 std::unique_lock lock{completed_mutex};
110 results = std::move(finished_work);
111 finished_work.clear();
112 }
113 return results;
114}
115
116void AsyncShaders::QueueOpenGLShader(const OpenGL::Device& device,
117 Tegra::Engines::ShaderType shader_type, u64 uid,
118 std::vector<u64> code, std::vector<u64> code_b,
119 u32 main_offset, CompilerSettings compiler_settings,
120 const Registry& registry, VAddr cpu_addr) {
121 std::unique_lock lock(queue_mutex);
122 pending_queue.push({
123 .backend = device.UseAssemblyShaders() ? Backend::GLASM : Backend::OpenGL,
124 .device = &device,
125 .shader_type = shader_type,
126 .uid = uid,
127 .code = std::move(code),
128 .code_b = std::move(code_b),
129 .main_offset = main_offset,
130 .compiler_settings = compiler_settings,
131 .registry = registry,
132 .cpu_address = cpu_addr,
133 .pp_cache = nullptr,
134 .vk_device = nullptr,
135 .scheduler = nullptr,
136 .descriptor_pool = nullptr,
137 .update_descriptor_queue = nullptr,
138 .bindings{},
139 .program{},
140 .key{},
141 .num_color_buffers = 0,
142 });
143 cv.notify_one();
144}
145
146void AsyncShaders::QueueVulkanShader(Vulkan::VKPipelineCache* pp_cache,
147 const Vulkan::Device& device, Vulkan::VKScheduler& scheduler,
148 Vulkan::VKDescriptorPool& descriptor_pool,
149 Vulkan::VKUpdateDescriptorQueue& update_descriptor_queue,
150 std::vector<VkDescriptorSetLayoutBinding> bindings,
151 Vulkan::SPIRVProgram program,
152 Vulkan::GraphicsPipelineCacheKey key, u32 num_color_buffers) {
153 std::unique_lock lock(queue_mutex);
154 pending_queue.push({
155 .backend = Backend::Vulkan,
156 .device = nullptr,
157 .shader_type{},
158 .uid = 0,
159 .code{},
160 .code_b{},
161 .main_offset = 0,
162 .compiler_settings{},
163 .registry{},
164 .cpu_address = 0,
165 .pp_cache = pp_cache,
166 .vk_device = &device,
167 .scheduler = &scheduler,
168 .descriptor_pool = &descriptor_pool,
169 .update_descriptor_queue = &update_descriptor_queue,
170 .bindings = std::move(bindings),
171 .program = std::move(program),
172 .key = key,
173 .num_color_buffers = num_color_buffers,
174 });
175 cv.notify_one();
176}
177
178void AsyncShaders::ShaderCompilerThread(Core::Frontend::GraphicsContext* context) {
179 while (!is_thread_exiting.load(std::memory_order_relaxed)) {
180 std::unique_lock lock{queue_mutex};
181 cv.wait(lock, [this] { return HasWorkQueued() || is_thread_exiting; });
182 if (is_thread_exiting) {
183 return;
184 }
185
186 // Partial lock to allow all threads to read at the same time
187 if (!HasWorkQueued()) {
188 continue;
189 }
190 // Another thread beat us, just unlock and wait for the next load
191 if (pending_queue.empty()) {
192 continue;
193 }
194
195 // Pull work from queue
196 WorkerParams work = std::move(pending_queue.front());
197 pending_queue.pop();
198 lock.unlock();
199
200 if (work.backend == Backend::OpenGL || work.backend == Backend::GLASM) {
201 const ShaderIR ir(work.code, work.main_offset, work.compiler_settings, *work.registry);
202 const auto scope = context->Acquire();
203 auto program =
204 OpenGL::BuildShader(*work.device, work.shader_type, work.uid, ir, *work.registry);
205 Result result{};
206 result.backend = work.backend;
207 result.cpu_address = work.cpu_address;
208 result.uid = work.uid;
209 result.code = std::move(work.code);
210 result.code_b = std::move(work.code_b);
211 result.shader_type = work.shader_type;
212
213 if (work.backend == Backend::OpenGL) {
214 result.program.opengl = std::move(program->source_program);
215 } else if (work.backend == Backend::GLASM) {
216 result.program.glasm = std::move(program->assembly_program);
217 }
218
219 {
220 std::unique_lock complete_lock(completed_mutex);
221 finished_work.push_back(std::move(result));
222 }
223 } else if (work.backend == Backend::Vulkan) {
224 auto pipeline = std::make_unique<Vulkan::VKGraphicsPipeline>(
225 *work.vk_device, *work.scheduler, *work.descriptor_pool,
226 *work.update_descriptor_queue, work.key, work.bindings, work.program,
227 work.num_color_buffers);
228
229 work.pp_cache->EmplacePipeline(std::move(pipeline));
230 }
231 }
232}
233
234} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/async_shaders.h b/src/video_core/shader/async_shaders.h
deleted file mode 100644
index 7fdff6e56..000000000
--- a/src/video_core/shader/async_shaders.h
+++ /dev/null
@@ -1,138 +0,0 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <condition_variable>
8#include <memory>
9#include <shared_mutex>
10#include <thread>
11
12#include <glad/glad.h>
13
14#include "common/common_types.h"
15#include "video_core/renderer_opengl/gl_device.h"
16#include "video_core/renderer_opengl/gl_resource_manager.h"
17#include "video_core/renderer_opengl/gl_shader_decompiler.h"
18#include "video_core/renderer_vulkan/vk_pipeline_cache.h"
19#include "video_core/renderer_vulkan/vk_scheduler.h"
20#include "video_core/vulkan_common/vulkan_device.h"
21
22namespace Core::Frontend {
23class EmuWindow;
24class GraphicsContext;
25} // namespace Core::Frontend
26
27namespace Tegra {
28class GPU;
29}
30
31namespace Vulkan {
32class VKPipelineCache;
33}
34
35namespace VideoCommon::Shader {
36
37class AsyncShaders {
38public:
39 enum class Backend {
40 OpenGL,
41 GLASM,
42 Vulkan,
43 };
44
45 struct ResultPrograms {
46 OpenGL::OGLProgram opengl;
47 OpenGL::OGLAssemblyProgram glasm;
48 };
49
50 struct Result {
51 u64 uid;
52 VAddr cpu_address;
53 Backend backend;
54 ResultPrograms program;
55 std::vector<u64> code;
56 std::vector<u64> code_b;
57 Tegra::Engines::ShaderType shader_type;
58 };
59
60 explicit AsyncShaders(Core::Frontend::EmuWindow& emu_window_);
61 ~AsyncShaders();
62
63 /// Start up shader worker threads
64 void AllocateWorkers();
65
66 /// Clear the shader queue and kill all worker threads
67 void FreeWorkers();
68
69 // Force end all threads
70 void KillWorkers();
71
72 /// Check to see if any shaders have actually been compiled
73 [[nodiscard]] bool HasCompletedWork() const;
74
75 /// Deduce if a shader can be build on another thread of MUST be built in sync. We cannot build
76 /// every shader async as some shaders are only built and executed once. We try to "guess" which
77 /// shader would be used only once
78 [[nodiscard]] bool IsShaderAsync(const Tegra::GPU& gpu) const;
79
80 /// Pulls completed compiled shaders
81 [[nodiscard]] std::vector<Result> GetCompletedWork();
82
83 void QueueOpenGLShader(const OpenGL::Device& device, Tegra::Engines::ShaderType shader_type,
84 u64 uid, std::vector<u64> code, std::vector<u64> code_b, u32 main_offset,
85 CompilerSettings compiler_settings, const Registry& registry,
86 VAddr cpu_addr);
87
88 void QueueVulkanShader(Vulkan::VKPipelineCache* pp_cache, const Vulkan::Device& device,
89 Vulkan::VKScheduler& scheduler,
90 Vulkan::VKDescriptorPool& descriptor_pool,
91 Vulkan::VKUpdateDescriptorQueue& update_descriptor_queue,
92 std::vector<VkDescriptorSetLayoutBinding> bindings,
93 Vulkan::SPIRVProgram program, Vulkan::GraphicsPipelineCacheKey key,
94 u32 num_color_buffers);
95
96private:
97 void ShaderCompilerThread(Core::Frontend::GraphicsContext* context);
98
99 /// Check our worker queue to see if we have any work queued already
100 [[nodiscard]] bool HasWorkQueued() const;
101
102 struct WorkerParams {
103 Backend backend;
104 // For OGL
105 const OpenGL::Device* device;
106 Tegra::Engines::ShaderType shader_type;
107 u64 uid;
108 std::vector<u64> code;
109 std::vector<u64> code_b;
110 u32 main_offset;
111 CompilerSettings compiler_settings;
112 std::optional<Registry> registry;
113 VAddr cpu_address;
114
115 // For Vulkan
116 Vulkan::VKPipelineCache* pp_cache;
117 const Vulkan::Device* vk_device;
118 Vulkan::VKScheduler* scheduler;
119 Vulkan::VKDescriptorPool* descriptor_pool;
120 Vulkan::VKUpdateDescriptorQueue* update_descriptor_queue;
121 std::vector<VkDescriptorSetLayoutBinding> bindings;
122 Vulkan::SPIRVProgram program;
123 Vulkan::GraphicsPipelineCacheKey key;
124 u32 num_color_buffers;
125 };
126
127 std::condition_variable cv;
128 mutable std::mutex queue_mutex;
129 mutable std::shared_mutex completed_mutex;
130 std::atomic<bool> is_thread_exiting{};
131 std::vector<std::unique_ptr<Core::Frontend::GraphicsContext>> context_list;
132 std::vector<std::thread> worker_threads;
133 std::queue<WorkerParams> pending_queue;
134 std::vector<Result> finished_work;
135 Core::Frontend::EmuWindow& emu_window;
136};
137
138} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/compiler_settings.cpp b/src/video_core/shader/compiler_settings.cpp
deleted file mode 100644
index cddcbd4f0..000000000
--- a/src/video_core/shader/compiler_settings.cpp
+++ /dev/null
@@ -1,26 +0,0 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "video_core/shader/compiler_settings.h"
6
7namespace VideoCommon::Shader {
8
9std::string CompileDepthAsString(const CompileDepth cd) {
10 switch (cd) {
11 case CompileDepth::BruteForce:
12 return "Brute Force Compile";
13 case CompileDepth::FlowStack:
14 return "Simple Flow Stack Mode";
15 case CompileDepth::NoFlowStack:
16 return "Remove Flow Stack";
17 case CompileDepth::DecompileBackwards:
18 return "Decompile Backward Jumps";
19 case CompileDepth::FullDecompile:
20 return "Full Decompilation";
21 default:
22 return "Unknown Compiler Process";
23 }
24}
25
26} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/compiler_settings.h b/src/video_core/shader/compiler_settings.h
deleted file mode 100644
index 916018c01..000000000
--- a/src/video_core/shader/compiler_settings.h
+++ /dev/null
@@ -1,26 +0,0 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include "video_core/engines/shader_bytecode.h"
8
9namespace VideoCommon::Shader {
10
11enum class CompileDepth : u32 {
12 BruteForce = 0,
13 FlowStack = 1,
14 NoFlowStack = 2,
15 DecompileBackwards = 3,
16 FullDecompile = 4,
17};
18
19std::string CompileDepthAsString(CompileDepth cd);
20
21struct CompilerSettings {
22 CompileDepth depth{CompileDepth::NoFlowStack};
23 bool disable_else_derivation{true};
24};
25
26} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/control_flow.cpp b/src/video_core/shader/control_flow.cpp
deleted file mode 100644
index 43d965f2f..000000000
--- a/src/video_core/shader/control_flow.cpp
+++ /dev/null
@@ -1,751 +0,0 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <list>
6#include <map>
7#include <set>
8#include <stack>
9#include <unordered_map>
10#include <vector>
11
12#include "common/assert.h"
13#include "common/common_types.h"
14#include "video_core/shader/ast.h"
15#include "video_core/shader/control_flow.h"
16#include "video_core/shader/memory_util.h"
17#include "video_core/shader/registry.h"
18#include "video_core/shader/shader_ir.h"
19
20namespace VideoCommon::Shader {
21
22namespace {
23
24using Tegra::Shader::Instruction;
25using Tegra::Shader::OpCode;
26
27constexpr s32 unassigned_branch = -2;
28
29struct Query {
30 u32 address{};
31 std::stack<u32> ssy_stack{};
32 std::stack<u32> pbk_stack{};
33};
34
35struct BlockStack {
36 BlockStack() = default;
37 explicit BlockStack(const Query& q) : ssy_stack{q.ssy_stack}, pbk_stack{q.pbk_stack} {}
38 std::stack<u32> ssy_stack{};
39 std::stack<u32> pbk_stack{};
40};
41
42template <typename T, typename... Args>
43BlockBranchInfo MakeBranchInfo(Args&&... args) {
44 static_assert(std::is_convertible_v<T, BranchData>);
45 return std::make_shared<BranchData>(T(std::forward<Args>(args)...));
46}
47
48bool BlockBranchIsIgnored(BlockBranchInfo first) {
49 bool ignore = false;
50 if (std::holds_alternative<SingleBranch>(*first)) {
51 const auto branch = std::get_if<SingleBranch>(first.get());
52 ignore = branch->ignore;
53 }
54 return ignore;
55}
56
57struct BlockInfo {
58 u32 start{};
59 u32 end{};
60 bool visited{};
61 BlockBranchInfo branch{};
62
63 bool IsInside(const u32 address) const {
64 return start <= address && address <= end;
65 }
66};
67
68struct CFGRebuildState {
69 explicit CFGRebuildState(const ProgramCode& program_code_, u32 start_, Registry& registry_)
70 : program_code{program_code_}, registry{registry_}, start{start_} {}
71
72 const ProgramCode& program_code;
73 Registry& registry;
74 u32 start{};
75 std::vector<BlockInfo> block_info;
76 std::list<u32> inspect_queries;
77 std::list<Query> queries;
78 std::unordered_map<u32, u32> registered;
79 std::set<u32> labels;
80 std::map<u32, u32> ssy_labels;
81 std::map<u32, u32> pbk_labels;
82 std::unordered_map<u32, BlockStack> stacks;
83 ASTManager* manager{};
84};
85
86enum class BlockCollision : u32 { None, Found, Inside };
87
88std::pair<BlockCollision, u32> TryGetBlock(CFGRebuildState& state, u32 address) {
89 const auto& blocks = state.block_info;
90 for (u32 index = 0; index < blocks.size(); index++) {
91 if (blocks[index].start == address) {
92 return {BlockCollision::Found, index};
93 }
94 if (blocks[index].IsInside(address)) {
95 return {BlockCollision::Inside, index};
96 }
97 }
98 return {BlockCollision::None, 0xFFFFFFFF};
99}
100
101struct ParseInfo {
102 BlockBranchInfo branch_info{};
103 u32 end_address{};
104};
105
106BlockInfo& CreateBlockInfo(CFGRebuildState& state, u32 start, u32 end) {
107 auto& it = state.block_info.emplace_back();
108 it.start = start;
109 it.end = end;
110 const u32 index = static_cast<u32>(state.block_info.size() - 1);
111 state.registered.insert({start, index});
112 return it;
113}
114
115Pred GetPredicate(u32 index, bool negated) {
116 return static_cast<Pred>(static_cast<u64>(index) + (negated ? 8ULL : 0ULL));
117}
118
119enum class ParseResult : u32 {
120 ControlCaught,
121 BlockEnd,
122 AbnormalFlow,
123};
124
125struct BranchIndirectInfo {
126 u32 buffer{};
127 u32 offset{};
128 u32 entries{};
129 s32 relative_position{};
130};
131
132struct BufferInfo {
133 u32 index;
134 u32 offset;
135};
136
137std::optional<std::pair<s32, u64>> GetBRXInfo(const CFGRebuildState& state, u32& pos) {
138 const Instruction instr = state.program_code[pos];
139 const auto opcode = OpCode::Decode(instr);
140 if (opcode->get().GetId() != OpCode::Id::BRX) {
141 return std::nullopt;
142 }
143 if (instr.brx.constant_buffer != 0) {
144 return std::nullopt;
145 }
146 --pos;
147 return std::make_pair(instr.brx.GetBranchExtend(), instr.gpr8.Value());
148}
149
150template <typename Result, typename TestCallable, typename PackCallable>
151// requires std::predicate<TestCallable, Instruction, const OpCode::Matcher&>
152// requires std::invocable<PackCallable, Instruction, const OpCode::Matcher&>
153std::optional<Result> TrackInstruction(const CFGRebuildState& state, u32& pos, TestCallable test,
154 PackCallable pack) {
155 for (; pos >= state.start; --pos) {
156 if (IsSchedInstruction(pos, state.start)) {
157 continue;
158 }
159 const Instruction instr = state.program_code[pos];
160 const auto opcode = OpCode::Decode(instr);
161 if (!opcode) {
162 continue;
163 }
164 if (test(instr, opcode->get())) {
165 --pos;
166 return std::make_optional(pack(instr, opcode->get()));
167 }
168 }
169 return std::nullopt;
170}
171
172std::optional<std::pair<BufferInfo, u64>> TrackLDC(const CFGRebuildState& state, u32& pos,
173 u64 brx_tracked_register) {
174 return TrackInstruction<std::pair<BufferInfo, u64>>(
175 state, pos,
176 [brx_tracked_register](auto instr, const auto& opcode) {
177 return opcode.GetId() == OpCode::Id::LD_C &&
178 instr.gpr0.Value() == brx_tracked_register &&
179 instr.ld_c.type.Value() == Tegra::Shader::UniformType::Single;
180 },
181 [](auto instr, const auto& opcode) {
182 const BufferInfo info = {static_cast<u32>(instr.cbuf36.index.Value()),
183 static_cast<u32>(instr.cbuf36.GetOffset())};
184 return std::make_pair(info, instr.gpr8.Value());
185 });
186}
187
188std::optional<u64> TrackSHLRegister(const CFGRebuildState& state, u32& pos,
189 u64 ldc_tracked_register) {
190 return TrackInstruction<u64>(
191 state, pos,
192 [ldc_tracked_register](auto instr, const auto& opcode) {
193 return opcode.GetId() == OpCode::Id::SHL_IMM &&
194 instr.gpr0.Value() == ldc_tracked_register;
195 },
196 [](auto instr, const auto&) { return instr.gpr8.Value(); });
197}
198
199std::optional<u32> TrackIMNMXValue(const CFGRebuildState& state, u32& pos,
200 u64 shl_tracked_register) {
201 return TrackInstruction<u32>(
202 state, pos,
203 [shl_tracked_register](auto instr, const auto& opcode) {
204 return opcode.GetId() == OpCode::Id::IMNMX_IMM &&
205 instr.gpr0.Value() == shl_tracked_register;
206 },
207 [](auto instr, const auto&) {
208 return static_cast<u32>(instr.alu.GetSignedImm20_20() + 1);
209 });
210}
211
212std::optional<BranchIndirectInfo> TrackBranchIndirectInfo(const CFGRebuildState& state, u32 pos) {
213 const auto brx_info = GetBRXInfo(state, pos);
214 if (!brx_info) {
215 return std::nullopt;
216 }
217 const auto [relative_position, brx_tracked_register] = *brx_info;
218
219 const auto ldc_info = TrackLDC(state, pos, brx_tracked_register);
220 if (!ldc_info) {
221 return std::nullopt;
222 }
223 const auto [buffer_info, ldc_tracked_register] = *ldc_info;
224
225 const auto shl_tracked_register = TrackSHLRegister(state, pos, ldc_tracked_register);
226 if (!shl_tracked_register) {
227 return std::nullopt;
228 }
229
230 const auto entries = TrackIMNMXValue(state, pos, *shl_tracked_register);
231 if (!entries) {
232 return std::nullopt;
233 }
234
235 return BranchIndirectInfo{buffer_info.index, buffer_info.offset, *entries, relative_position};
236}
237
238std::pair<ParseResult, ParseInfo> ParseCode(CFGRebuildState& state, u32 address) {
239 u32 offset = static_cast<u32>(address);
240 const u32 end_address = static_cast<u32>(state.program_code.size());
241 ParseInfo parse_info{};
242 SingleBranch single_branch{};
243
244 const auto insert_label = [](CFGRebuildState& rebuild_state, u32 label_address) {
245 const auto pair = rebuild_state.labels.emplace(label_address);
246 if (pair.second) {
247 rebuild_state.inspect_queries.push_back(label_address);
248 }
249 };
250
251 while (true) {
252 if (offset >= end_address) {
253 // ASSERT_OR_EXECUTE can't be used, as it ignores the break
254 ASSERT_MSG(false, "Shader passed the current limit!");
255
256 single_branch.address = exit_branch;
257 single_branch.ignore = false;
258 break;
259 }
260 if (state.registered.contains(offset)) {
261 single_branch.address = offset;
262 single_branch.ignore = true;
263 break;
264 }
265 if (IsSchedInstruction(offset, state.start)) {
266 offset++;
267 continue;
268 }
269 const Instruction instr = {state.program_code[offset]};
270 const auto opcode = OpCode::Decode(instr);
271 if (!opcode || opcode->get().GetType() != OpCode::Type::Flow) {
272 offset++;
273 continue;
274 }
275
276 switch (opcode->get().GetId()) {
277 case OpCode::Id::EXIT: {
278 const auto pred_index = static_cast<u32>(instr.pred.pred_index);
279 single_branch.condition.predicate = GetPredicate(pred_index, instr.negate_pred != 0);
280 if (single_branch.condition.predicate == Pred::NeverExecute) {
281 offset++;
282 continue;
283 }
284 const ConditionCode cc = instr.flow_condition_code;
285 single_branch.condition.cc = cc;
286 if (cc == ConditionCode::F) {
287 offset++;
288 continue;
289 }
290 single_branch.address = exit_branch;
291 single_branch.kill = false;
292 single_branch.is_sync = false;
293 single_branch.is_brk = false;
294 single_branch.ignore = false;
295 parse_info.end_address = offset;
296 parse_info.branch_info = MakeBranchInfo<SingleBranch>(
297 single_branch.condition, single_branch.address, single_branch.kill,
298 single_branch.is_sync, single_branch.is_brk, single_branch.ignore);
299
300 return {ParseResult::ControlCaught, parse_info};
301 }
302 case OpCode::Id::BRA: {
303 if (instr.bra.constant_buffer != 0) {
304 return {ParseResult::AbnormalFlow, parse_info};
305 }
306 const auto pred_index = static_cast<u32>(instr.pred.pred_index);
307 single_branch.condition.predicate = GetPredicate(pred_index, instr.negate_pred != 0);
308 if (single_branch.condition.predicate == Pred::NeverExecute) {
309 offset++;
310 continue;
311 }
312 const ConditionCode cc = instr.flow_condition_code;
313 single_branch.condition.cc = cc;
314 if (cc == ConditionCode::F) {
315 offset++;
316 continue;
317 }
318 const u32 branch_offset = offset + instr.bra.GetBranchTarget();
319 if (branch_offset == 0) {
320 single_branch.address = exit_branch;
321 } else {
322 single_branch.address = branch_offset;
323 }
324 insert_label(state, branch_offset);
325 single_branch.kill = false;
326 single_branch.is_sync = false;
327 single_branch.is_brk = false;
328 single_branch.ignore = false;
329 parse_info.end_address = offset;
330 parse_info.branch_info = MakeBranchInfo<SingleBranch>(
331 single_branch.condition, single_branch.address, single_branch.kill,
332 single_branch.is_sync, single_branch.is_brk, single_branch.ignore);
333
334 return {ParseResult::ControlCaught, parse_info};
335 }
336 case OpCode::Id::SYNC: {
337 const auto pred_index = static_cast<u32>(instr.pred.pred_index);
338 single_branch.condition.predicate = GetPredicate(pred_index, instr.negate_pred != 0);
339 if (single_branch.condition.predicate == Pred::NeverExecute) {
340 offset++;
341 continue;
342 }
343 const ConditionCode cc = instr.flow_condition_code;
344 single_branch.condition.cc = cc;
345 if (cc == ConditionCode::F) {
346 offset++;
347 continue;
348 }
349 single_branch.address = unassigned_branch;
350 single_branch.kill = false;
351 single_branch.is_sync = true;
352 single_branch.is_brk = false;
353 single_branch.ignore = false;
354 parse_info.end_address = offset;
355 parse_info.branch_info = MakeBranchInfo<SingleBranch>(
356 single_branch.condition, single_branch.address, single_branch.kill,
357 single_branch.is_sync, single_branch.is_brk, single_branch.ignore);
358
359 return {ParseResult::ControlCaught, parse_info};
360 }
361 case OpCode::Id::BRK: {
362 const auto pred_index = static_cast<u32>(instr.pred.pred_index);
363 single_branch.condition.predicate = GetPredicate(pred_index, instr.negate_pred != 0);
364 if (single_branch.condition.predicate == Pred::NeverExecute) {
365 offset++;
366 continue;
367 }
368 const ConditionCode cc = instr.flow_condition_code;
369 single_branch.condition.cc = cc;
370 if (cc == ConditionCode::F) {
371 offset++;
372 continue;
373 }
374 single_branch.address = unassigned_branch;
375 single_branch.kill = false;
376 single_branch.is_sync = false;
377 single_branch.is_brk = true;
378 single_branch.ignore = false;
379 parse_info.end_address = offset;
380 parse_info.branch_info = MakeBranchInfo<SingleBranch>(
381 single_branch.condition, single_branch.address, single_branch.kill,
382 single_branch.is_sync, single_branch.is_brk, single_branch.ignore);
383
384 return {ParseResult::ControlCaught, parse_info};
385 }
386 case OpCode::Id::KIL: {
387 const auto pred_index = static_cast<u32>(instr.pred.pred_index);
388 single_branch.condition.predicate = GetPredicate(pred_index, instr.negate_pred != 0);
389 if (single_branch.condition.predicate == Pred::NeverExecute) {
390 offset++;
391 continue;
392 }
393 const ConditionCode cc = instr.flow_condition_code;
394 single_branch.condition.cc = cc;
395 if (cc == ConditionCode::F) {
396 offset++;
397 continue;
398 }
399 single_branch.address = exit_branch;
400 single_branch.kill = true;
401 single_branch.is_sync = false;
402 single_branch.is_brk = false;
403 single_branch.ignore = false;
404 parse_info.end_address = offset;
405 parse_info.branch_info = MakeBranchInfo<SingleBranch>(
406 single_branch.condition, single_branch.address, single_branch.kill,
407 single_branch.is_sync, single_branch.is_brk, single_branch.ignore);
408
409 return {ParseResult::ControlCaught, parse_info};
410 }
411 case OpCode::Id::SSY: {
412 const u32 target = offset + instr.bra.GetBranchTarget();
413 insert_label(state, target);
414 state.ssy_labels.emplace(offset, target);
415 break;
416 }
417 case OpCode::Id::PBK: {
418 const u32 target = offset + instr.bra.GetBranchTarget();
419 insert_label(state, target);
420 state.pbk_labels.emplace(offset, target);
421 break;
422 }
423 case OpCode::Id::BRX: {
424 const auto tmp = TrackBranchIndirectInfo(state, offset);
425 if (!tmp) {
426 LOG_WARNING(HW_GPU, "BRX Track Unsuccesful");
427 return {ParseResult::AbnormalFlow, parse_info};
428 }
429
430 const auto result = *tmp;
431 const s32 pc_target = offset + result.relative_position;
432 std::vector<CaseBranch> branches;
433 for (u32 i = 0; i < result.entries; i++) {
434 auto key = state.registry.ObtainKey(result.buffer, result.offset + i * 4);
435 if (!key) {
436 return {ParseResult::AbnormalFlow, parse_info};
437 }
438 u32 value = *key;
439 u32 target = static_cast<u32>((value >> 3) + pc_target);
440 insert_label(state, target);
441 branches.emplace_back(value, target);
442 }
443 parse_info.end_address = offset;
444 parse_info.branch_info = MakeBranchInfo<MultiBranch>(
445 static_cast<u32>(instr.gpr8.Value()), std::move(branches));
446
447 return {ParseResult::ControlCaught, parse_info};
448 }
449 default:
450 break;
451 }
452
453 offset++;
454 }
455 single_branch.kill = false;
456 single_branch.is_sync = false;
457 single_branch.is_brk = false;
458 parse_info.end_address = offset - 1;
459 parse_info.branch_info = MakeBranchInfo<SingleBranch>(
460 single_branch.condition, single_branch.address, single_branch.kill, single_branch.is_sync,
461 single_branch.is_brk, single_branch.ignore);
462 return {ParseResult::BlockEnd, parse_info};
463}
464
465bool TryInspectAddress(CFGRebuildState& state) {
466 if (state.inspect_queries.empty()) {
467 return false;
468 }
469
470 const u32 address = state.inspect_queries.front();
471 state.inspect_queries.pop_front();
472 const auto [result, block_index] = TryGetBlock(state, address);
473 switch (result) {
474 case BlockCollision::Found: {
475 return true;
476 }
477 case BlockCollision::Inside: {
478 // This case is the tricky one:
479 // We need to split the block into 2 separate blocks
480 const u32 end = state.block_info[block_index].end;
481 BlockInfo& new_block = CreateBlockInfo(state, address, end);
482 BlockInfo& current_block = state.block_info[block_index];
483 current_block.end = address - 1;
484 new_block.branch = std::move(current_block.branch);
485 BlockBranchInfo forward_branch = MakeBranchInfo<SingleBranch>();
486 const auto branch = std::get_if<SingleBranch>(forward_branch.get());
487 branch->address = address;
488 branch->ignore = true;
489 current_block.branch = std::move(forward_branch);
490 return true;
491 }
492 default:
493 break;
494 }
495 const auto [parse_result, parse_info] = ParseCode(state, address);
496 if (parse_result == ParseResult::AbnormalFlow) {
497 // if it's AbnormalFlow, we end it as false, ending the CFG reconstruction
498 return false;
499 }
500
501 BlockInfo& block_info = CreateBlockInfo(state, address, parse_info.end_address);
502 block_info.branch = parse_info.branch_info;
503 if (std::holds_alternative<SingleBranch>(*block_info.branch)) {
504 const auto branch = std::get_if<SingleBranch>(block_info.branch.get());
505 if (branch->condition.IsUnconditional()) {
506 return true;
507 }
508 const u32 fallthrough_address = parse_info.end_address + 1;
509 state.inspect_queries.push_front(fallthrough_address);
510 return true;
511 }
512 return true;
513}
514
515bool TryQuery(CFGRebuildState& state) {
516 const auto gather_labels = [](std::stack<u32>& cc, std::map<u32, u32>& labels,
517 BlockInfo& block) {
518 auto gather_start = labels.lower_bound(block.start);
519 const auto gather_end = labels.upper_bound(block.end);
520 while (gather_start != gather_end) {
521 cc.push(gather_start->second);
522 ++gather_start;
523 }
524 };
525 if (state.queries.empty()) {
526 return false;
527 }
528
529 Query& q = state.queries.front();
530 const u32 block_index = state.registered[q.address];
531 BlockInfo& block = state.block_info[block_index];
532 // If the block is visited, check if the stacks match, else gather the ssy/pbk
533 // labels into the current stack and look if the branch at the end of the block
534 // consumes a label. Schedule new queries accordingly
535 if (block.visited) {
536 BlockStack& stack = state.stacks[q.address];
537 const bool all_okay = (stack.ssy_stack.empty() || q.ssy_stack == stack.ssy_stack) &&
538 (stack.pbk_stack.empty() || q.pbk_stack == stack.pbk_stack);
539 state.queries.pop_front();
540 return all_okay;
541 }
542 block.visited = true;
543 state.stacks.insert_or_assign(q.address, BlockStack{q});
544
545 Query q2(q);
546 state.queries.pop_front();
547 gather_labels(q2.ssy_stack, state.ssy_labels, block);
548 gather_labels(q2.pbk_stack, state.pbk_labels, block);
549 if (std::holds_alternative<SingleBranch>(*block.branch)) {
550 auto* branch = std::get_if<SingleBranch>(block.branch.get());
551 if (!branch->condition.IsUnconditional()) {
552 q2.address = block.end + 1;
553 state.queries.push_back(q2);
554 }
555
556 auto& conditional_query = state.queries.emplace_back(q2);
557 if (branch->is_sync) {
558 if (branch->address == unassigned_branch) {
559 branch->address = conditional_query.ssy_stack.top();
560 }
561 conditional_query.ssy_stack.pop();
562 }
563 if (branch->is_brk) {
564 if (branch->address == unassigned_branch) {
565 branch->address = conditional_query.pbk_stack.top();
566 }
567 conditional_query.pbk_stack.pop();
568 }
569 conditional_query.address = branch->address;
570 return true;
571 }
572
573 const auto* multi_branch = std::get_if<MultiBranch>(block.branch.get());
574 for (const auto& branch_case : multi_branch->branches) {
575 auto& conditional_query = state.queries.emplace_back(q2);
576 conditional_query.address = branch_case.address;
577 }
578
579 return true;
580}
581
582void InsertBranch(ASTManager& mm, const BlockBranchInfo& branch_info) {
583 const auto get_expr = [](const Condition& cond) -> Expr {
584 Expr result;
585 if (cond.cc != ConditionCode::T) {
586 result = MakeExpr<ExprCondCode>(cond.cc);
587 }
588 if (cond.predicate != Pred::UnusedIndex) {
589 u32 pred = static_cast<u32>(cond.predicate);
590 bool negate = false;
591 if (pred > 7) {
592 negate = true;
593 pred -= 8;
594 }
595 Expr extra = MakeExpr<ExprPredicate>(pred);
596 if (negate) {
597 extra = MakeExpr<ExprNot>(std::move(extra));
598 }
599 if (result) {
600 return MakeExpr<ExprAnd>(std::move(extra), std::move(result));
601 }
602 return extra;
603 }
604 if (result) {
605 return result;
606 }
607 return MakeExpr<ExprBoolean>(true);
608 };
609
610 if (std::holds_alternative<SingleBranch>(*branch_info)) {
611 const auto* branch = std::get_if<SingleBranch>(branch_info.get());
612 if (branch->address < 0) {
613 if (branch->kill) {
614 mm.InsertReturn(get_expr(branch->condition), true);
615 return;
616 }
617 mm.InsertReturn(get_expr(branch->condition), false);
618 return;
619 }
620 mm.InsertGoto(get_expr(branch->condition), branch->address);
621 return;
622 }
623 const auto* multi_branch = std::get_if<MultiBranch>(branch_info.get());
624 for (const auto& branch_case : multi_branch->branches) {
625 mm.InsertGoto(MakeExpr<ExprGprEqual>(multi_branch->gpr, branch_case.cmp_value),
626 branch_case.address);
627 }
628}
629
630void DecompileShader(CFGRebuildState& state) {
631 state.manager->Init();
632 for (auto label : state.labels) {
633 state.manager->DeclareLabel(label);
634 }
635 for (const auto& block : state.block_info) {
636 if (state.labels.contains(block.start)) {
637 state.manager->InsertLabel(block.start);
638 }
639 const bool ignore = BlockBranchIsIgnored(block.branch);
640 const u32 end = ignore ? block.end + 1 : block.end;
641 state.manager->InsertBlock(block.start, end);
642 if (!ignore) {
643 InsertBranch(*state.manager, block.branch);
644 }
645 }
646 state.manager->Decompile();
647}
648
649} // Anonymous namespace
650
651std::unique_ptr<ShaderCharacteristics> ScanFlow(const ProgramCode& program_code, u32 start_address,
652 const CompilerSettings& settings,
653 Registry& registry) {
654 auto result_out = std::make_unique<ShaderCharacteristics>();
655 if (settings.depth == CompileDepth::BruteForce) {
656 result_out->settings.depth = CompileDepth::BruteForce;
657 return result_out;
658 }
659
660 CFGRebuildState state{program_code, start_address, registry};
661 // Inspect Code and generate blocks
662 state.labels.clear();
663 state.labels.emplace(start_address);
664 state.inspect_queries.push_back(state.start);
665 while (!state.inspect_queries.empty()) {
666 if (!TryInspectAddress(state)) {
667 result_out->settings.depth = CompileDepth::BruteForce;
668 return result_out;
669 }
670 }
671
672 bool use_flow_stack = true;
673
674 bool decompiled = false;
675
676 if (settings.depth != CompileDepth::FlowStack) {
677 // Decompile Stacks
678 state.queries.push_back(Query{state.start, {}, {}});
679 decompiled = true;
680 while (!state.queries.empty()) {
681 if (!TryQuery(state)) {
682 decompiled = false;
683 break;
684 }
685 }
686 }
687
688 use_flow_stack = !decompiled;
689
690 // Sort and organize results
691 std::sort(state.block_info.begin(), state.block_info.end(),
692 [](const BlockInfo& a, const BlockInfo& b) -> bool { return a.start < b.start; });
693 if (decompiled && settings.depth != CompileDepth::NoFlowStack) {
694 ASTManager manager{settings.depth != CompileDepth::DecompileBackwards,
695 settings.disable_else_derivation};
696 state.manager = &manager;
697 DecompileShader(state);
698 decompiled = state.manager->IsFullyDecompiled();
699 if (!decompiled) {
700 if (settings.depth == CompileDepth::FullDecompile) {
701 LOG_CRITICAL(HW_GPU, "Failed to remove all the gotos!:");
702 } else {
703 LOG_CRITICAL(HW_GPU, "Failed to remove all backward gotos!:");
704 }
705 state.manager->ShowCurrentState("Of Shader");
706 state.manager->Clear();
707 } else {
708 auto characteristics = std::make_unique<ShaderCharacteristics>();
709 characteristics->start = start_address;
710 characteristics->settings.depth = settings.depth;
711 characteristics->manager = std::move(manager);
712 characteristics->end = state.block_info.back().end + 1;
713 return characteristics;
714 }
715 }
716
717 result_out->start = start_address;
718 result_out->settings.depth =
719 use_flow_stack ? CompileDepth::FlowStack : CompileDepth::NoFlowStack;
720 result_out->blocks.clear();
721 for (auto& block : state.block_info) {
722 ShaderBlock new_block{};
723 new_block.start = block.start;
724 new_block.end = block.end;
725 new_block.ignore_branch = BlockBranchIsIgnored(block.branch);
726 if (!new_block.ignore_branch) {
727 new_block.branch = block.branch;
728 }
729 result_out->end = std::max(result_out->end, block.end);
730 result_out->blocks.push_back(new_block);
731 }
732 if (!use_flow_stack) {
733 result_out->labels = std::move(state.labels);
734 return result_out;
735 }
736
737 auto back = result_out->blocks.begin();
738 auto next = std::next(back);
739 while (next != result_out->blocks.end()) {
740 if (!state.labels.contains(next->start) && next->start == back->end + 1) {
741 back->end = next->end;
742 next = result_out->blocks.erase(next);
743 continue;
744 }
745 back = next;
746 ++next;
747 }
748
749 return result_out;
750}
751} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/control_flow.h b/src/video_core/shader/control_flow.h
deleted file mode 100644
index 37bf96492..000000000
--- a/src/video_core/shader/control_flow.h
+++ /dev/null
@@ -1,117 +0,0 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <list>
8#include <optional>
9#include <set>
10#include <variant>
11
12#include "video_core/engines/shader_bytecode.h"
13#include "video_core/shader/ast.h"
14#include "video_core/shader/compiler_settings.h"
15#include "video_core/shader/registry.h"
16#include "video_core/shader/shader_ir.h"
17
18namespace VideoCommon::Shader {
19
20using Tegra::Shader::ConditionCode;
21using Tegra::Shader::Pred;
22
23constexpr s32 exit_branch = -1;
24
25struct Condition {
26 Pred predicate{Pred::UnusedIndex};
27 ConditionCode cc{ConditionCode::T};
28
29 bool IsUnconditional() const {
30 return predicate == Pred::UnusedIndex && cc == ConditionCode::T;
31 }
32
33 bool operator==(const Condition& other) const {
34 return std::tie(predicate, cc) == std::tie(other.predicate, other.cc);
35 }
36
37 bool operator!=(const Condition& other) const {
38 return !operator==(other);
39 }
40};
41
42class SingleBranch {
43public:
44 SingleBranch() = default;
45 explicit SingleBranch(Condition condition_, s32 address_, bool kill_, bool is_sync_,
46 bool is_brk_, bool ignore_)
47 : condition{condition_}, address{address_}, kill{kill_}, is_sync{is_sync_}, is_brk{is_brk_},
48 ignore{ignore_} {}
49
50 bool operator==(const SingleBranch& b) const {
51 return std::tie(condition, address, kill, is_sync, is_brk, ignore) ==
52 std::tie(b.condition, b.address, b.kill, b.is_sync, b.is_brk, b.ignore);
53 }
54
55 bool operator!=(const SingleBranch& b) const {
56 return !operator==(b);
57 }
58
59 Condition condition{};
60 s32 address{exit_branch};
61 bool kill{};
62 bool is_sync{};
63 bool is_brk{};
64 bool ignore{};
65};
66
67struct CaseBranch {
68 explicit CaseBranch(u32 cmp_value_, u32 address_) : cmp_value{cmp_value_}, address{address_} {}
69 u32 cmp_value;
70 u32 address;
71};
72
73class MultiBranch {
74public:
75 explicit MultiBranch(u32 gpr_, std::vector<CaseBranch>&& branches_)
76 : gpr{gpr_}, branches{std::move(branches_)} {}
77
78 u32 gpr{};
79 std::vector<CaseBranch> branches{};
80};
81
82using BranchData = std::variant<SingleBranch, MultiBranch>;
83using BlockBranchInfo = std::shared_ptr<BranchData>;
84
85bool BlockBranchInfoAreEqual(BlockBranchInfo first, BlockBranchInfo second);
86
87struct ShaderBlock {
88 u32 start{};
89 u32 end{};
90 bool ignore_branch{};
91 BlockBranchInfo branch{};
92
93 bool operator==(const ShaderBlock& sb) const {
94 return std::tie(start, end, ignore_branch) ==
95 std::tie(sb.start, sb.end, sb.ignore_branch) &&
96 BlockBranchInfoAreEqual(branch, sb.branch);
97 }
98
99 bool operator!=(const ShaderBlock& sb) const {
100 return !operator==(sb);
101 }
102};
103
104struct ShaderCharacteristics {
105 std::list<ShaderBlock> blocks{};
106 std::set<u32> labels{};
107 u32 start{};
108 u32 end{};
109 ASTManager manager{true, true};
110 CompilerSettings settings{};
111};
112
113std::unique_ptr<ShaderCharacteristics> ScanFlow(const ProgramCode& program_code, u32 start_address,
114 const CompilerSettings& settings,
115 Registry& registry);
116
117} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode.cpp b/src/video_core/shader/decode.cpp
deleted file mode 100644
index 6576d1208..000000000
--- a/src/video_core/shader/decode.cpp
+++ /dev/null
@@ -1,368 +0,0 @@
1// Copyright 2018 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <cstring>
6#include <limits>
7#include <set>
8
9#include <fmt/format.h>
10
11#include "common/assert.h"
12#include "common/common_types.h"
13#include "video_core/engines/shader_bytecode.h"
14#include "video_core/engines/shader_header.h"
15#include "video_core/shader/control_flow.h"
16#include "video_core/shader/memory_util.h"
17#include "video_core/shader/node_helper.h"
18#include "video_core/shader/shader_ir.h"
19
20namespace VideoCommon::Shader {
21
22using Tegra::Shader::Instruction;
23using Tegra::Shader::OpCode;
24
25namespace {
26
27void DeduceTextureHandlerSize(VideoCore::GuestDriverProfile& gpu_driver,
28 const std::list<SamplerEntry>& used_samplers) {
29 if (gpu_driver.IsTextureHandlerSizeKnown() || used_samplers.size() <= 1) {
30 return;
31 }
32 u32 count{};
33 std::vector<u32> bound_offsets;
34 for (const auto& sampler : used_samplers) {
35 if (sampler.is_bindless) {
36 continue;
37 }
38 ++count;
39 bound_offsets.emplace_back(sampler.offset);
40 }
41 if (count > 1) {
42 gpu_driver.DeduceTextureHandlerSize(std::move(bound_offsets));
43 }
44}
45
46std::optional<u32> TryDeduceSamplerSize(const SamplerEntry& sampler_to_deduce,
47 VideoCore::GuestDriverProfile& gpu_driver,
48 const std::list<SamplerEntry>& used_samplers) {
49 const u32 base_offset = sampler_to_deduce.offset;
50 u32 max_offset{std::numeric_limits<u32>::max()};
51 for (const auto& sampler : used_samplers) {
52 if (sampler.is_bindless) {
53 continue;
54 }
55 if (sampler.offset > base_offset) {
56 max_offset = std::min(sampler.offset, max_offset);
57 }
58 }
59 if (max_offset == std::numeric_limits<u32>::max()) {
60 return std::nullopt;
61 }
62 return ((max_offset - base_offset) * 4) / gpu_driver.GetTextureHandlerSize();
63}
64
65} // Anonymous namespace
66
67class ASTDecoder {
68public:
69 explicit ASTDecoder(ShaderIR& ir_) : ir(ir_) {}
70
71 void operator()(ASTProgram& ast) {
72 ASTNode current = ast.nodes.GetFirst();
73 while (current) {
74 Visit(current);
75 current = current->GetNext();
76 }
77 }
78
79 void operator()(ASTIfThen& ast) {
80 ASTNode current = ast.nodes.GetFirst();
81 while (current) {
82 Visit(current);
83 current = current->GetNext();
84 }
85 }
86
87 void operator()(ASTIfElse& ast) {
88 ASTNode current = ast.nodes.GetFirst();
89 while (current) {
90 Visit(current);
91 current = current->GetNext();
92 }
93 }
94
95 void operator()(ASTBlockEncoded& ast) {}
96
97 void operator()(ASTBlockDecoded& ast) {}
98
99 void operator()(ASTVarSet& ast) {}
100
101 void operator()(ASTLabel& ast) {}
102
103 void operator()(ASTGoto& ast) {}
104
105 void operator()(ASTDoWhile& ast) {
106 ASTNode current = ast.nodes.GetFirst();
107 while (current) {
108 Visit(current);
109 current = current->GetNext();
110 }
111 }
112
113 void operator()(ASTReturn& ast) {}
114
115 void operator()(ASTBreak& ast) {}
116
117 void Visit(ASTNode& node) {
118 std::visit(*this, *node->GetInnerData());
119 if (node->IsBlockEncoded()) {
120 auto block = std::get_if<ASTBlockEncoded>(node->GetInnerData());
121 NodeBlock bb = ir.DecodeRange(block->start, block->end);
122 node->TransformBlockEncoded(std::move(bb));
123 }
124 }
125
126private:
127 ShaderIR& ir;
128};
129
130void ShaderIR::Decode() {
131 std::memcpy(&header, program_code.data(), sizeof(Tegra::Shader::Header));
132
133 decompiled = false;
134 auto info = ScanFlow(program_code, main_offset, settings, registry);
135 auto& shader_info = *info;
136 coverage_begin = shader_info.start;
137 coverage_end = shader_info.end;
138 switch (shader_info.settings.depth) {
139 case CompileDepth::FlowStack: {
140 for (const auto& block : shader_info.blocks) {
141 basic_blocks.insert({block.start, DecodeRange(block.start, block.end + 1)});
142 }
143 break;
144 }
145 case CompileDepth::NoFlowStack: {
146 disable_flow_stack = true;
147 const auto insert_block = [this](NodeBlock& nodes, u32 label) {
148 if (label == static_cast<u32>(exit_branch)) {
149 return;
150 }
151 basic_blocks.insert({label, nodes});
152 };
153 const auto& blocks = shader_info.blocks;
154 NodeBlock current_block;
155 u32 current_label = static_cast<u32>(exit_branch);
156 for (const auto& block : blocks) {
157 if (shader_info.labels.contains(block.start)) {
158 insert_block(current_block, current_label);
159 current_block.clear();
160 current_label = block.start;
161 }
162 if (!block.ignore_branch) {
163 DecodeRangeInner(current_block, block.start, block.end);
164 InsertControlFlow(current_block, block);
165 } else {
166 DecodeRangeInner(current_block, block.start, block.end + 1);
167 }
168 }
169 insert_block(current_block, current_label);
170 break;
171 }
172 case CompileDepth::DecompileBackwards:
173 case CompileDepth::FullDecompile: {
174 program_manager = std::move(shader_info.manager);
175 disable_flow_stack = true;
176 decompiled = true;
177 ASTDecoder decoder{*this};
178 ASTNode program = GetASTProgram();
179 decoder.Visit(program);
180 break;
181 }
182 default:
183 LOG_CRITICAL(HW_GPU, "Unknown decompilation mode!");
184 [[fallthrough]];
185 case CompileDepth::BruteForce: {
186 const auto shader_end = static_cast<u32>(program_code.size());
187 coverage_begin = main_offset;
188 coverage_end = shader_end;
189 for (u32 label = main_offset; label < shader_end; ++label) {
190 basic_blocks.insert({label, DecodeRange(label, label + 1)});
191 }
192 break;
193 }
194 }
195 if (settings.depth != shader_info.settings.depth) {
196 LOG_WARNING(
197 HW_GPU, "Decompiling to this setting \"{}\" failed, downgrading to this setting \"{}\"",
198 CompileDepthAsString(settings.depth), CompileDepthAsString(shader_info.settings.depth));
199 }
200}
201
202NodeBlock ShaderIR::DecodeRange(u32 begin, u32 end) {
203 NodeBlock basic_block;
204 DecodeRangeInner(basic_block, begin, end);
205 return basic_block;
206}
207
208void ShaderIR::DecodeRangeInner(NodeBlock& bb, u32 begin, u32 end) {
209 for (u32 pc = begin; pc < (begin > end ? MAX_PROGRAM_LENGTH : end);) {
210 pc = DecodeInstr(bb, pc);
211 }
212}
213
214void ShaderIR::InsertControlFlow(NodeBlock& bb, const ShaderBlock& block) {
215 const auto apply_conditions = [&](const Condition& cond, Node n) -> Node {
216 Node result = n;
217 if (cond.cc != ConditionCode::T) {
218 result = Conditional(GetConditionCode(cond.cc), {result});
219 }
220 if (cond.predicate != Pred::UnusedIndex) {
221 u32 pred = static_cast<u32>(cond.predicate);
222 const bool is_neg = pred > 7;
223 if (is_neg) {
224 pred -= 8;
225 }
226 result = Conditional(GetPredicate(pred, is_neg), {result});
227 }
228 return result;
229 };
230 if (std::holds_alternative<SingleBranch>(*block.branch)) {
231 auto branch = std::get_if<SingleBranch>(block.branch.get());
232 if (branch->address < 0) {
233 if (branch->kill) {
234 Node n = Operation(OperationCode::Discard);
235 n = apply_conditions(branch->condition, n);
236 bb.push_back(n);
237 global_code.push_back(n);
238 return;
239 }
240 Node n = Operation(OperationCode::Exit);
241 n = apply_conditions(branch->condition, n);
242 bb.push_back(n);
243 global_code.push_back(n);
244 return;
245 }
246 Node n = Operation(OperationCode::Branch, Immediate(branch->address));
247 n = apply_conditions(branch->condition, n);
248 bb.push_back(n);
249 global_code.push_back(n);
250 return;
251 }
252 auto multi_branch = std::get_if<MultiBranch>(block.branch.get());
253 Node op_a = GetRegister(multi_branch->gpr);
254 for (auto& branch_case : multi_branch->branches) {
255 Node n = Operation(OperationCode::Branch, Immediate(branch_case.address));
256 Node op_b = Immediate(branch_case.cmp_value);
257 Node condition =
258 GetPredicateComparisonInteger(Tegra::Shader::PredCondition::EQ, false, op_a, op_b);
259 auto result = Conditional(condition, {n});
260 bb.push_back(result);
261 global_code.push_back(result);
262 }
263}
264
265u32 ShaderIR::DecodeInstr(NodeBlock& bb, u32 pc) {
266 // Ignore sched instructions when generating code.
267 if (IsSchedInstruction(pc, main_offset)) {
268 return pc + 1;
269 }
270
271 const Instruction instr = {program_code[pc]};
272 const auto opcode = OpCode::Decode(instr);
273 const u32 nv_address = ConvertAddressToNvidiaSpace(pc);
274
275 // Decoding failure
276 if (!opcode) {
277 UNIMPLEMENTED_MSG("Unhandled instruction: {0:x}", instr.value);
278 bb.push_back(Comment(fmt::format("{:05x} Unimplemented Shader instruction (0x{:016x})",
279 nv_address, instr.value)));
280 return pc + 1;
281 }
282
283 bb.push_back(Comment(
284 fmt::format("{:05x} {} (0x{:016x})", nv_address, opcode->get().GetName(), instr.value)));
285
286 using Tegra::Shader::Pred;
287 UNIMPLEMENTED_IF_MSG(instr.pred.full_pred == Pred::NeverExecute,
288 "NeverExecute predicate not implemented");
289
290 static const std::map<OpCode::Type, u32 (ShaderIR::*)(NodeBlock&, u32)> decoders = {
291 {OpCode::Type::Arithmetic, &ShaderIR::DecodeArithmetic},
292 {OpCode::Type::ArithmeticImmediate, &ShaderIR::DecodeArithmeticImmediate},
293 {OpCode::Type::Bfe, &ShaderIR::DecodeBfe},
294 {OpCode::Type::Bfi, &ShaderIR::DecodeBfi},
295 {OpCode::Type::Shift, &ShaderIR::DecodeShift},
296 {OpCode::Type::ArithmeticInteger, &ShaderIR::DecodeArithmeticInteger},
297 {OpCode::Type::ArithmeticIntegerImmediate, &ShaderIR::DecodeArithmeticIntegerImmediate},
298 {OpCode::Type::ArithmeticHalf, &ShaderIR::DecodeArithmeticHalf},
299 {OpCode::Type::ArithmeticHalfImmediate, &ShaderIR::DecodeArithmeticHalfImmediate},
300 {OpCode::Type::Ffma, &ShaderIR::DecodeFfma},
301 {OpCode::Type::Hfma2, &ShaderIR::DecodeHfma2},
302 {OpCode::Type::Conversion, &ShaderIR::DecodeConversion},
303 {OpCode::Type::Warp, &ShaderIR::DecodeWarp},
304 {OpCode::Type::Memory, &ShaderIR::DecodeMemory},
305 {OpCode::Type::Texture, &ShaderIR::DecodeTexture},
306 {OpCode::Type::Image, &ShaderIR::DecodeImage},
307 {OpCode::Type::FloatSetPredicate, &ShaderIR::DecodeFloatSetPredicate},
308 {OpCode::Type::IntegerSetPredicate, &ShaderIR::DecodeIntegerSetPredicate},
309 {OpCode::Type::HalfSetPredicate, &ShaderIR::DecodeHalfSetPredicate},
310 {OpCode::Type::PredicateSetRegister, &ShaderIR::DecodePredicateSetRegister},
311 {OpCode::Type::PredicateSetPredicate, &ShaderIR::DecodePredicateSetPredicate},
312 {OpCode::Type::RegisterSetPredicate, &ShaderIR::DecodeRegisterSetPredicate},
313 {OpCode::Type::FloatSet, &ShaderIR::DecodeFloatSet},
314 {OpCode::Type::IntegerSet, &ShaderIR::DecodeIntegerSet},
315 {OpCode::Type::HalfSet, &ShaderIR::DecodeHalfSet},
316 {OpCode::Type::Video, &ShaderIR::DecodeVideo},
317 {OpCode::Type::Xmad, &ShaderIR::DecodeXmad},
318 };
319
320 std::vector<Node> tmp_block;
321 if (const auto decoder = decoders.find(opcode->get().GetType()); decoder != decoders.end()) {
322 pc = (this->*decoder->second)(tmp_block, pc);
323 } else {
324 pc = DecodeOther(tmp_block, pc);
325 }
326
327 // Some instructions (like SSY) don't have a predicate field, they are always unconditionally
328 // executed.
329 const bool can_be_predicated = OpCode::IsPredicatedInstruction(opcode->get().GetId());
330 const auto pred_index = static_cast<u32>(instr.pred.pred_index);
331
332 if (can_be_predicated && pred_index != static_cast<u32>(Pred::UnusedIndex)) {
333 const Node conditional =
334 Conditional(GetPredicate(pred_index, instr.negate_pred != 0), std::move(tmp_block));
335 global_code.push_back(conditional);
336 bb.push_back(conditional);
337 } else {
338 for (auto& node : tmp_block) {
339 global_code.push_back(node);
340 bb.push_back(node);
341 }
342 }
343
344 return pc + 1;
345}
346
347void ShaderIR::PostDecode() {
348 // Deduce texture handler size if needed
349 auto gpu_driver = registry.AccessGuestDriverProfile();
350 DeduceTextureHandlerSize(gpu_driver, used_samplers);
351 // Deduce Indexed Samplers
352 if (!uses_indexed_samplers) {
353 return;
354 }
355 for (auto& sampler : used_samplers) {
356 if (!sampler.is_indexed) {
357 continue;
358 }
359 if (const auto size = TryDeduceSamplerSize(sampler, gpu_driver, used_samplers)) {
360 sampler.size = *size;
361 } else {
362 LOG_CRITICAL(HW_GPU, "Failed to deduce size of indexed sampler");
363 sampler.size = 1;
364 }
365 }
366}
367
368} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode/arithmetic.cpp b/src/video_core/shader/decode/arithmetic.cpp
deleted file mode 100644
index 15eb700e7..000000000
--- a/src/video_core/shader/decode/arithmetic.cpp
+++ /dev/null
@@ -1,166 +0,0 @@
1// Copyright 2018 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/assert.h"
6#include "common/common_types.h"
7#include "common/logging/log.h"
8#include "video_core/engines/shader_bytecode.h"
9#include "video_core/shader/node_helper.h"
10#include "video_core/shader/shader_ir.h"
11
12namespace VideoCommon::Shader {
13
14using Tegra::Shader::Instruction;
15using Tegra::Shader::OpCode;
16using Tegra::Shader::SubOp;
17
18u32 ShaderIR::DecodeArithmetic(NodeBlock& bb, u32 pc) {
19 const Instruction instr = {program_code[pc]};
20 const auto opcode = OpCode::Decode(instr);
21
22 Node op_a = GetRegister(instr.gpr8);
23
24 Node op_b = [&] {
25 if (instr.is_b_imm) {
26 return GetImmediate19(instr);
27 } else if (instr.is_b_gpr) {
28 return GetRegister(instr.gpr20);
29 } else {
30 return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset());
31 }
32 }();
33
34 switch (opcode->get().GetId()) {
35 case OpCode::Id::MOV_C:
36 case OpCode::Id::MOV_R: {
37 // MOV does not have neither 'abs' nor 'neg' bits.
38 SetRegister(bb, instr.gpr0, op_b);
39 break;
40 }
41 case OpCode::Id::FMUL_C:
42 case OpCode::Id::FMUL_R:
43 case OpCode::Id::FMUL_IMM: {
44 // FMUL does not have 'abs' bits and only the second operand has a 'neg' bit.
45 if (instr.fmul.tab5cb8_2 != 0) {
46 LOG_DEBUG(HW_GPU, "FMUL tab5cb8_2({}) is not implemented",
47 instr.fmul.tab5cb8_2.Value());
48 }
49 if (instr.fmul.tab5c68_0 != 1) {
50 LOG_DEBUG(HW_GPU, "FMUL tab5cb8_0({}) is not implemented",
51 instr.fmul.tab5c68_0.Value());
52 }
53
54 op_b = GetOperandAbsNegFloat(op_b, false, instr.fmul.negate_b);
55
56 static constexpr std::array FmulPostFactor = {
57 1.000f, // None
58 0.500f, // Divide 2
59 0.250f, // Divide 4
60 0.125f, // Divide 8
61 8.000f, // Mul 8
62 4.000f, // Mul 4
63 2.000f, // Mul 2
64 };
65
66 if (instr.fmul.postfactor != 0) {
67 op_a = Operation(OperationCode::FMul, NO_PRECISE, op_a,
68 Immediate(FmulPostFactor[instr.fmul.postfactor]));
69 }
70
71 // TODO(Rodrigo): Should precise be used when there's a postfactor?
72 Node value = Operation(OperationCode::FMul, PRECISE, op_a, op_b);
73
74 value = GetSaturatedFloat(value, instr.alu.saturate_d);
75
76 SetInternalFlagsFromFloat(bb, value, instr.generates_cc);
77 SetRegister(bb, instr.gpr0, value);
78 break;
79 }
80 case OpCode::Id::FADD_C:
81 case OpCode::Id::FADD_R:
82 case OpCode::Id::FADD_IMM: {
83 op_a = GetOperandAbsNegFloat(op_a, instr.alu.abs_a, instr.alu.negate_a);
84 op_b = GetOperandAbsNegFloat(op_b, instr.alu.abs_b, instr.alu.negate_b);
85
86 Node value = Operation(OperationCode::FAdd, PRECISE, op_a, op_b);
87 value = GetSaturatedFloat(value, instr.alu.saturate_d);
88
89 SetInternalFlagsFromFloat(bb, value, instr.generates_cc);
90 SetRegister(bb, instr.gpr0, value);
91 break;
92 }
93 case OpCode::Id::MUFU: {
94 op_a = GetOperandAbsNegFloat(op_a, instr.alu.abs_a, instr.alu.negate_a);
95
96 Node value = [&]() {
97 switch (instr.sub_op) {
98 case SubOp::Cos:
99 return Operation(OperationCode::FCos, PRECISE, op_a);
100 case SubOp::Sin:
101 return Operation(OperationCode::FSin, PRECISE, op_a);
102 case SubOp::Ex2:
103 return Operation(OperationCode::FExp2, PRECISE, op_a);
104 case SubOp::Lg2:
105 return Operation(OperationCode::FLog2, PRECISE, op_a);
106 case SubOp::Rcp:
107 return Operation(OperationCode::FDiv, PRECISE, Immediate(1.0f), op_a);
108 case SubOp::Rsq:
109 return Operation(OperationCode::FInverseSqrt, PRECISE, op_a);
110 case SubOp::Sqrt:
111 return Operation(OperationCode::FSqrt, PRECISE, op_a);
112 default:
113 UNIMPLEMENTED_MSG("Unhandled MUFU sub op={0:x}", instr.sub_op.Value());
114 return Immediate(0);
115 }
116 }();
117 value = GetSaturatedFloat(value, instr.alu.saturate_d);
118
119 SetRegister(bb, instr.gpr0, value);
120 break;
121 }
122 case OpCode::Id::FMNMX_C:
123 case OpCode::Id::FMNMX_R:
124 case OpCode::Id::FMNMX_IMM: {
125 op_a = GetOperandAbsNegFloat(op_a, instr.alu.abs_a, instr.alu.negate_a);
126 op_b = GetOperandAbsNegFloat(op_b, instr.alu.abs_b, instr.alu.negate_b);
127
128 const Node condition = GetPredicate(instr.alu.fmnmx.pred, instr.alu.fmnmx.negate_pred != 0);
129
130 const Node min = Operation(OperationCode::FMin, NO_PRECISE, op_a, op_b);
131 const Node max = Operation(OperationCode::FMax, NO_PRECISE, op_a, op_b);
132 const Node value = Operation(OperationCode::Select, NO_PRECISE, condition, min, max);
133
134 SetInternalFlagsFromFloat(bb, value, instr.generates_cc);
135 SetRegister(bb, instr.gpr0, value);
136 break;
137 }
138 case OpCode::Id::FCMP_RR:
139 case OpCode::Id::FCMP_RC:
140 case OpCode::Id::FCMP_IMMR: {
141 UNIMPLEMENTED_IF(instr.fcmp.ftz == 0);
142 Node op_c = GetRegister(instr.gpr39);
143 Node comp = GetPredicateComparisonFloat(instr.fcmp.cond, std::move(op_c), Immediate(0.0f));
144 SetRegister(
145 bb, instr.gpr0,
146 Operation(OperationCode::Select, std::move(comp), std::move(op_a), std::move(op_b)));
147 break;
148 }
149 case OpCode::Id::RRO_C:
150 case OpCode::Id::RRO_R:
151 case OpCode::Id::RRO_IMM: {
152 LOG_DEBUG(HW_GPU, "(STUBBED) RRO used");
153
154 // Currently RRO is only implemented as a register move.
155 op_b = GetOperandAbsNegFloat(op_b, instr.alu.abs_b, instr.alu.negate_b);
156 SetRegister(bb, instr.gpr0, op_b);
157 break;
158 }
159 default:
160 UNIMPLEMENTED_MSG("Unhandled arithmetic instruction: {}", opcode->get().GetName());
161 }
162
163 return pc;
164}
165
166} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode/arithmetic_half.cpp b/src/video_core/shader/decode/arithmetic_half.cpp
deleted file mode 100644
index 88103fede..000000000
--- a/src/video_core/shader/decode/arithmetic_half.cpp
+++ /dev/null
@@ -1,101 +0,0 @@
1// Copyright 2018 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/assert.h"
6#include "common/common_types.h"
7#include "common/logging/log.h"
8#include "video_core/engines/shader_bytecode.h"
9#include "video_core/shader/node_helper.h"
10#include "video_core/shader/shader_ir.h"
11
12namespace VideoCommon::Shader {
13
14using Tegra::Shader::HalfType;
15using Tegra::Shader::Instruction;
16using Tegra::Shader::OpCode;
17
18u32 ShaderIR::DecodeArithmeticHalf(NodeBlock& bb, u32 pc) {
19 const Instruction instr = {program_code[pc]};
20 const auto opcode = OpCode::Decode(instr);
21
22 bool negate_a = false;
23 bool negate_b = false;
24 bool absolute_a = false;
25 bool absolute_b = false;
26
27 switch (opcode->get().GetId()) {
28 case OpCode::Id::HADD2_R:
29 if (instr.alu_half.ftz == 0) {
30 LOG_DEBUG(HW_GPU, "{} without FTZ is not implemented", opcode->get().GetName());
31 }
32 negate_a = ((instr.value >> 43) & 1) != 0;
33 negate_b = ((instr.value >> 31) & 1) != 0;
34 absolute_a = ((instr.value >> 44) & 1) != 0;
35 absolute_b = ((instr.value >> 30) & 1) != 0;
36 break;
37 case OpCode::Id::HADD2_C:
38 if (instr.alu_half.ftz == 0) {
39 LOG_DEBUG(HW_GPU, "{} without FTZ is not implemented", opcode->get().GetName());
40 }
41 negate_a = ((instr.value >> 43) & 1) != 0;
42 negate_b = ((instr.value >> 56) & 1) != 0;
43 absolute_a = ((instr.value >> 44) & 1) != 0;
44 absolute_b = ((instr.value >> 54) & 1) != 0;
45 break;
46 case OpCode::Id::HMUL2_R:
47 negate_a = ((instr.value >> 43) & 1) != 0;
48 absolute_a = ((instr.value >> 44) & 1) != 0;
49 absolute_b = ((instr.value >> 30) & 1) != 0;
50 break;
51 case OpCode::Id::HMUL2_C:
52 negate_b = ((instr.value >> 31) & 1) != 0;
53 absolute_a = ((instr.value >> 44) & 1) != 0;
54 absolute_b = ((instr.value >> 54) & 1) != 0;
55 break;
56 default:
57 UNREACHABLE();
58 break;
59 }
60
61 Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.alu_half.type_a);
62 op_a = GetOperandAbsNegHalf(op_a, absolute_a, negate_a);
63
64 auto [type_b, op_b] = [this, instr, opcode]() -> std::pair<HalfType, Node> {
65 switch (opcode->get().GetId()) {
66 case OpCode::Id::HADD2_C:
67 case OpCode::Id::HMUL2_C:
68 return {HalfType::F32, GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset())};
69 case OpCode::Id::HADD2_R:
70 case OpCode::Id::HMUL2_R:
71 return {instr.alu_half.type_b, GetRegister(instr.gpr20)};
72 default:
73 UNREACHABLE();
74 return {HalfType::F32, Immediate(0)};
75 }
76 }();
77 op_b = UnpackHalfFloat(op_b, type_b);
78 op_b = GetOperandAbsNegHalf(op_b, absolute_b, negate_b);
79
80 Node value = [this, opcode, op_a, op_b = op_b] {
81 switch (opcode->get().GetId()) {
82 case OpCode::Id::HADD2_C:
83 case OpCode::Id::HADD2_R:
84 return Operation(OperationCode::HAdd, PRECISE, op_a, op_b);
85 case OpCode::Id::HMUL2_C:
86 case OpCode::Id::HMUL2_R:
87 return Operation(OperationCode::HMul, PRECISE, op_a, op_b);
88 default:
89 UNIMPLEMENTED_MSG("Unhandled half float instruction: {}", opcode->get().GetName());
90 return Immediate(0);
91 }
92 }();
93 value = GetSaturatedHalfFloat(value, instr.alu_half.saturate);
94 value = HalfMerge(GetRegister(instr.gpr0), value, instr.alu_half.merge);
95
96 SetRegister(bb, instr.gpr0, value);
97
98 return pc;
99}
100
101} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode/arithmetic_half_immediate.cpp b/src/video_core/shader/decode/arithmetic_half_immediate.cpp
deleted file mode 100644
index d179b9873..000000000
--- a/src/video_core/shader/decode/arithmetic_half_immediate.cpp
+++ /dev/null
@@ -1,54 +0,0 @@
1// Copyright 2018 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/assert.h"
6#include "common/common_types.h"
7#include "common/logging/log.h"
8#include "video_core/engines/shader_bytecode.h"
9#include "video_core/shader/node_helper.h"
10#include "video_core/shader/shader_ir.h"
11
12namespace VideoCommon::Shader {
13
14using Tegra::Shader::Instruction;
15using Tegra::Shader::OpCode;
16
17u32 ShaderIR::DecodeArithmeticHalfImmediate(NodeBlock& bb, u32 pc) {
18 const Instruction instr = {program_code[pc]};
19 const auto opcode = OpCode::Decode(instr);
20
21 if (opcode->get().GetId() == OpCode::Id::HADD2_IMM) {
22 if (instr.alu_half_imm.ftz == 0) {
23 LOG_DEBUG(HW_GPU, "{} without FTZ is not implemented", opcode->get().GetName());
24 }
25 } else {
26 if (instr.alu_half_imm.precision != Tegra::Shader::HalfPrecision::FTZ) {
27 LOG_DEBUG(HW_GPU, "{} without FTZ is not implemented", opcode->get().GetName());
28 }
29 }
30
31 Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.alu_half_imm.type_a);
32 op_a = GetOperandAbsNegHalf(op_a, instr.alu_half_imm.abs_a, instr.alu_half_imm.negate_a);
33
34 const Node op_b = UnpackHalfImmediate(instr, true);
35
36 Node value = [&]() {
37 switch (opcode->get().GetId()) {
38 case OpCode::Id::HADD2_IMM:
39 return Operation(OperationCode::HAdd, PRECISE, op_a, op_b);
40 case OpCode::Id::HMUL2_IMM:
41 return Operation(OperationCode::HMul, PRECISE, op_a, op_b);
42 default:
43 UNREACHABLE();
44 return Immediate(0);
45 }
46 }();
47
48 value = GetSaturatedHalfFloat(value, instr.alu_half_imm.saturate);
49 value = HalfMerge(GetRegister(instr.gpr0), value, instr.alu_half_imm.merge);
50 SetRegister(bb, instr.gpr0, value);
51 return pc;
52}
53
54} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode/arithmetic_immediate.cpp b/src/video_core/shader/decode/arithmetic_immediate.cpp
deleted file mode 100644
index f1875967c..000000000
--- a/src/video_core/shader/decode/arithmetic_immediate.cpp
+++ /dev/null
@@ -1,53 +0,0 @@
1// Copyright 2018 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/assert.h"
6#include "common/common_types.h"
7#include "video_core/engines/shader_bytecode.h"
8#include "video_core/shader/node_helper.h"
9#include "video_core/shader/shader_ir.h"
10
11namespace VideoCommon::Shader {
12
13using Tegra::Shader::Instruction;
14using Tegra::Shader::OpCode;
15
16u32 ShaderIR::DecodeArithmeticImmediate(NodeBlock& bb, u32 pc) {
17 const Instruction instr = {program_code[pc]};
18 const auto opcode = OpCode::Decode(instr);
19
20 switch (opcode->get().GetId()) {
21 case OpCode::Id::MOV32_IMM: {
22 SetRegister(bb, instr.gpr0, GetImmediate32(instr));
23 break;
24 }
25 case OpCode::Id::FMUL32_IMM: {
26 Node value =
27 Operation(OperationCode::FMul, PRECISE, GetRegister(instr.gpr8), GetImmediate32(instr));
28 value = GetSaturatedFloat(value, instr.fmul32.saturate);
29
30 SetInternalFlagsFromFloat(bb, value, instr.op_32.generates_cc);
31 SetRegister(bb, instr.gpr0, value);
32 break;
33 }
34 case OpCode::Id::FADD32I: {
35 const Node op_a = GetOperandAbsNegFloat(GetRegister(instr.gpr8), instr.fadd32i.abs_a,
36 instr.fadd32i.negate_a);
37 const Node op_b = GetOperandAbsNegFloat(GetImmediate32(instr), instr.fadd32i.abs_b,
38 instr.fadd32i.negate_b);
39
40 const Node value = Operation(OperationCode::FAdd, PRECISE, op_a, op_b);
41 SetInternalFlagsFromFloat(bb, value, instr.op_32.generates_cc);
42 SetRegister(bb, instr.gpr0, value);
43 break;
44 }
45 default:
46 UNIMPLEMENTED_MSG("Unhandled arithmetic immediate instruction: {}",
47 opcode->get().GetName());
48 }
49
50 return pc;
51}
52
53} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode/arithmetic_integer.cpp b/src/video_core/shader/decode/arithmetic_integer.cpp
deleted file mode 100644
index 7b5bb7003..000000000
--- a/src/video_core/shader/decode/arithmetic_integer.cpp
+++ /dev/null
@@ -1,375 +0,0 @@
1// Copyright 2018 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/assert.h"
6#include "common/common_types.h"
7#include "video_core/engines/shader_bytecode.h"
8#include "video_core/shader/node_helper.h"
9#include "video_core/shader/shader_ir.h"
10
11namespace VideoCommon::Shader {
12
13using Tegra::Shader::IAdd3Height;
14using Tegra::Shader::Instruction;
15using Tegra::Shader::OpCode;
16using Tegra::Shader::Pred;
17using Tegra::Shader::Register;
18
19u32 ShaderIR::DecodeArithmeticInteger(NodeBlock& bb, u32 pc) {
20 const Instruction instr = {program_code[pc]};
21 const auto opcode = OpCode::Decode(instr);
22
23 Node op_a = GetRegister(instr.gpr8);
24 Node op_b = [&]() {
25 if (instr.is_b_imm) {
26 return Immediate(instr.alu.GetSignedImm20_20());
27 } else if (instr.is_b_gpr) {
28 return GetRegister(instr.gpr20);
29 } else {
30 return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset());
31 }
32 }();
33
34 switch (opcode->get().GetId()) {
35 case OpCode::Id::IADD_C:
36 case OpCode::Id::IADD_R:
37 case OpCode::Id::IADD_IMM: {
38 UNIMPLEMENTED_IF_MSG(instr.alu.saturate_d, "IADD.SAT");
39 UNIMPLEMENTED_IF_MSG(instr.iadd.x && instr.generates_cc, "IADD.X Rd.CC");
40
41 op_a = GetOperandAbsNegInteger(op_a, false, instr.alu_integer.negate_a, true);
42 op_b = GetOperandAbsNegInteger(op_b, false, instr.alu_integer.negate_b, true);
43
44 Node value = Operation(OperationCode::UAdd, op_a, op_b);
45
46 if (instr.iadd.x) {
47 Node carry = GetInternalFlag(InternalFlag::Carry);
48 Node x = Operation(OperationCode::Select, std::move(carry), Immediate(1), Immediate(0));
49 value = Operation(OperationCode::UAdd, std::move(value), std::move(x));
50 }
51
52 if (instr.generates_cc) {
53 const Node i0 = Immediate(0);
54
55 Node zero = Operation(OperationCode::LogicalIEqual, value, i0);
56 Node sign = Operation(OperationCode::LogicalILessThan, value, i0);
57 Node carry = Operation(OperationCode::LogicalAddCarry, op_a, op_b);
58
59 Node pos_a = Operation(OperationCode::LogicalIGreaterThan, op_a, i0);
60 Node pos_b = Operation(OperationCode::LogicalIGreaterThan, op_b, i0);
61 Node pos = Operation(OperationCode::LogicalAnd, std::move(pos_a), std::move(pos_b));
62 Node overflow = Operation(OperationCode::LogicalAnd, pos, sign);
63
64 SetInternalFlag(bb, InternalFlag::Zero, std::move(zero));
65 SetInternalFlag(bb, InternalFlag::Sign, std::move(sign));
66 SetInternalFlag(bb, InternalFlag::Carry, std::move(carry));
67 SetInternalFlag(bb, InternalFlag::Overflow, std::move(overflow));
68 }
69 SetRegister(bb, instr.gpr0, std::move(value));
70 break;
71 }
72 case OpCode::Id::IADD3_C:
73 case OpCode::Id::IADD3_R:
74 case OpCode::Id::IADD3_IMM: {
75 Node op_c = GetRegister(instr.gpr39);
76
77 const auto ApplyHeight = [&](IAdd3Height height, Node value) {
78 switch (height) {
79 case IAdd3Height::None:
80 return value;
81 case IAdd3Height::LowerHalfWord:
82 return BitfieldExtract(value, 0, 16);
83 case IAdd3Height::UpperHalfWord:
84 return BitfieldExtract(value, 16, 16);
85 default:
86 UNIMPLEMENTED_MSG("Unhandled IADD3 height: {}", height);
87 return Immediate(0);
88 }
89 };
90
91 if (opcode->get().GetId() == OpCode::Id::IADD3_R) {
92 op_a = ApplyHeight(instr.iadd3.height_a, op_a);
93 op_b = ApplyHeight(instr.iadd3.height_b, op_b);
94 op_c = ApplyHeight(instr.iadd3.height_c, op_c);
95 }
96
97 op_a = GetOperandAbsNegInteger(op_a, false, instr.iadd3.neg_a, true);
98 op_b = GetOperandAbsNegInteger(op_b, false, instr.iadd3.neg_b, true);
99 op_c = GetOperandAbsNegInteger(op_c, false, instr.iadd3.neg_c, true);
100
101 const Node value = [&] {
102 Node add_ab = Operation(OperationCode::IAdd, NO_PRECISE, op_a, op_b);
103 if (opcode->get().GetId() != OpCode::Id::IADD3_R) {
104 return Operation(OperationCode::IAdd, NO_PRECISE, add_ab, op_c);
105 }
106 const Node shifted = [&] {
107 switch (instr.iadd3.mode) {
108 case Tegra::Shader::IAdd3Mode::RightShift:
109 // TODO(tech4me): According to
110 // https://envytools.readthedocs.io/en/latest/hw/graph/maxwell/cuda/int.html?highlight=iadd3
111 // The addition between op_a and op_b should be done in uint33, more
112 // investigation required
113 return Operation(OperationCode::ILogicalShiftRight, NO_PRECISE, add_ab,
114 Immediate(16));
115 case Tegra::Shader::IAdd3Mode::LeftShift:
116 return Operation(OperationCode::ILogicalShiftLeft, NO_PRECISE, add_ab,
117 Immediate(16));
118 default:
119 return add_ab;
120 }
121 }();
122 return Operation(OperationCode::IAdd, NO_PRECISE, shifted, op_c);
123 }();
124
125 SetInternalFlagsFromInteger(bb, value, instr.generates_cc);
126 SetRegister(bb, instr.gpr0, value);
127 break;
128 }
129 case OpCode::Id::ISCADD_C:
130 case OpCode::Id::ISCADD_R:
131 case OpCode::Id::ISCADD_IMM: {
132 UNIMPLEMENTED_IF_MSG(instr.generates_cc,
133 "Condition codes generation in ISCADD is not implemented");
134
135 op_a = GetOperandAbsNegInteger(op_a, false, instr.alu_integer.negate_a, true);
136 op_b = GetOperandAbsNegInteger(op_b, false, instr.alu_integer.negate_b, true);
137
138 const Node shift = Immediate(static_cast<u32>(instr.alu_integer.shift_amount));
139 const Node shifted_a = Operation(OperationCode::ILogicalShiftLeft, NO_PRECISE, op_a, shift);
140 const Node value = Operation(OperationCode::IAdd, NO_PRECISE, shifted_a, op_b);
141
142 SetInternalFlagsFromInteger(bb, value, instr.generates_cc);
143 SetRegister(bb, instr.gpr0, value);
144 break;
145 }
146 case OpCode::Id::POPC_C:
147 case OpCode::Id::POPC_R:
148 case OpCode::Id::POPC_IMM: {
149 if (instr.popc.invert) {
150 op_b = Operation(OperationCode::IBitwiseNot, NO_PRECISE, op_b);
151 }
152 const Node value = Operation(OperationCode::IBitCount, PRECISE, op_b);
153 SetRegister(bb, instr.gpr0, value);
154 break;
155 }
156 case OpCode::Id::FLO_R:
157 case OpCode::Id::FLO_C:
158 case OpCode::Id::FLO_IMM: {
159 Node value;
160 if (instr.flo.invert) {
161 op_b = Operation(OperationCode::IBitwiseNot, NO_PRECISE, std::move(op_b));
162 }
163 if (instr.flo.is_signed) {
164 value = Operation(OperationCode::IBitMSB, NO_PRECISE, std::move(op_b));
165 } else {
166 value = Operation(OperationCode::UBitMSB, NO_PRECISE, std::move(op_b));
167 }
168 if (instr.flo.sh) {
169 value =
170 Operation(OperationCode::UBitwiseXor, NO_PRECISE, std::move(value), Immediate(31));
171 }
172 SetRegister(bb, instr.gpr0, std::move(value));
173 break;
174 }
175 case OpCode::Id::SEL_C:
176 case OpCode::Id::SEL_R:
177 case OpCode::Id::SEL_IMM: {
178 const Node condition = GetPredicate(instr.sel.pred, instr.sel.neg_pred != 0);
179 const Node value = Operation(OperationCode::Select, PRECISE, condition, op_a, op_b);
180 SetRegister(bb, instr.gpr0, value);
181 break;
182 }
183 case OpCode::Id::ICMP_CR:
184 case OpCode::Id::ICMP_R:
185 case OpCode::Id::ICMP_RC:
186 case OpCode::Id::ICMP_IMM: {
187 const Node zero = Immediate(0);
188
189 const auto [op_rhs, test] = [&]() -> std::pair<Node, Node> {
190 switch (opcode->get().GetId()) {
191 case OpCode::Id::ICMP_CR:
192 return {GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()),
193 GetRegister(instr.gpr39)};
194 case OpCode::Id::ICMP_R:
195 return {GetRegister(instr.gpr20), GetRegister(instr.gpr39)};
196 case OpCode::Id::ICMP_RC:
197 return {GetRegister(instr.gpr39),
198 GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset())};
199 case OpCode::Id::ICMP_IMM:
200 return {Immediate(instr.alu.GetSignedImm20_20()), GetRegister(instr.gpr39)};
201 default:
202 UNREACHABLE();
203 return {zero, zero};
204 }
205 }();
206 const Node op_lhs = GetRegister(instr.gpr8);
207 const Node comparison =
208 GetPredicateComparisonInteger(instr.icmp.cond, instr.icmp.is_signed != 0, test, zero);
209 SetRegister(bb, instr.gpr0, Operation(OperationCode::Select, comparison, op_lhs, op_rhs));
210 break;
211 }
212 case OpCode::Id::LOP_C:
213 case OpCode::Id::LOP_R:
214 case OpCode::Id::LOP_IMM: {
215 if (instr.alu.lop.invert_a)
216 op_a = Operation(OperationCode::IBitwiseNot, NO_PRECISE, op_a);
217 if (instr.alu.lop.invert_b)
218 op_b = Operation(OperationCode::IBitwiseNot, NO_PRECISE, op_b);
219
220 WriteLogicOperation(bb, instr.gpr0, instr.alu.lop.operation, op_a, op_b,
221 instr.alu.lop.pred_result_mode, instr.alu.lop.pred48,
222 instr.generates_cc);
223 break;
224 }
225 case OpCode::Id::LOP3_C:
226 case OpCode::Id::LOP3_R:
227 case OpCode::Id::LOP3_IMM: {
228 const Node op_c = GetRegister(instr.gpr39);
229 const Node lut = [&]() {
230 if (opcode->get().GetId() == OpCode::Id::LOP3_R) {
231 return Immediate(instr.alu.lop3.GetImmLut28());
232 } else {
233 return Immediate(instr.alu.lop3.GetImmLut48());
234 }
235 }();
236
237 WriteLop3Instruction(bb, instr.gpr0, op_a, op_b, op_c, lut, instr.generates_cc);
238 break;
239 }
240 case OpCode::Id::IMNMX_C:
241 case OpCode::Id::IMNMX_R:
242 case OpCode::Id::IMNMX_IMM: {
243 UNIMPLEMENTED_IF(instr.imnmx.exchange != Tegra::Shader::IMinMaxExchange::None);
244
245 const bool is_signed = instr.imnmx.is_signed;
246
247 const Node condition = GetPredicate(instr.imnmx.pred, instr.imnmx.negate_pred != 0);
248 const Node min = SignedOperation(OperationCode::IMin, is_signed, NO_PRECISE, op_a, op_b);
249 const Node max = SignedOperation(OperationCode::IMax, is_signed, NO_PRECISE, op_a, op_b);
250 const Node value = Operation(OperationCode::Select, NO_PRECISE, condition, min, max);
251
252 SetInternalFlagsFromInteger(bb, value, instr.generates_cc);
253 SetRegister(bb, instr.gpr0, value);
254 break;
255 }
256 case OpCode::Id::LEA_R2:
257 case OpCode::Id::LEA_R1:
258 case OpCode::Id::LEA_IMM:
259 case OpCode::Id::LEA_RZ:
260 case OpCode::Id::LEA_HI: {
261 auto [op_a_, op_b_, op_c_] = [&]() -> std::tuple<Node, Node, Node> {
262 switch (opcode->get().GetId()) {
263 case OpCode::Id::LEA_R2: {
264 return {GetRegister(instr.gpr20), GetRegister(instr.gpr39),
265 Immediate(static_cast<u32>(instr.lea.r2.entry_a))};
266 }
267 case OpCode::Id::LEA_R1: {
268 const bool neg = instr.lea.r1.neg != 0;
269 return {GetOperandAbsNegInteger(GetRegister(instr.gpr8), false, neg, true),
270 GetRegister(instr.gpr20),
271 Immediate(static_cast<u32>(instr.lea.r1.entry_a))};
272 }
273 case OpCode::Id::LEA_IMM: {
274 const bool neg = instr.lea.imm.neg != 0;
275 return {GetOperandAbsNegInteger(GetRegister(instr.gpr8), false, neg, true),
276 Immediate(static_cast<u32>(instr.lea.imm.entry_a)),
277 Immediate(static_cast<u32>(instr.lea.imm.entry_b))};
278 }
279 case OpCode::Id::LEA_RZ: {
280 const bool neg = instr.lea.rz.neg != 0;
281 return {GetConstBuffer(instr.lea.rz.cb_index, instr.lea.rz.cb_offset),
282 GetOperandAbsNegInteger(GetRegister(instr.gpr8), false, neg, true),
283 Immediate(static_cast<u32>(instr.lea.rz.entry_a))};
284 }
285 case OpCode::Id::LEA_HI:
286 default:
287 UNIMPLEMENTED_MSG("Unhandled LEA subinstruction: {}", opcode->get().GetName());
288
289 return {Immediate(static_cast<u32>(instr.lea.imm.entry_a)), GetRegister(instr.gpr8),
290 Immediate(static_cast<u32>(instr.lea.imm.entry_b))};
291 }
292 }();
293
294 UNIMPLEMENTED_IF_MSG(instr.lea.pred48 != static_cast<u64>(Pred::UnusedIndex),
295 "Unhandled LEA Predicate");
296
297 Node value =
298 Operation(OperationCode::ILogicalShiftLeft, std::move(op_a_), std::move(op_c_));
299 value = Operation(OperationCode::IAdd, std::move(op_b_), std::move(value));
300 SetRegister(bb, instr.gpr0, std::move(value));
301
302 break;
303 }
304 default:
305 UNIMPLEMENTED_MSG("Unhandled ArithmeticInteger instruction: {}", opcode->get().GetName());
306 }
307
308 return pc;
309}
310
311void ShaderIR::WriteLop3Instruction(NodeBlock& bb, Register dest, Node op_a, Node op_b, Node op_c,
312 Node imm_lut, bool sets_cc) {
313 const Node lop3_fast = [&](const Node na, const Node nb, const Node nc, const Node ttbl) {
314 Node value = Immediate(0);
315 const ImmediateNode imm = std::get<ImmediateNode>(*ttbl);
316 if (imm.GetValue() & 0x01) {
317 const Node a = Operation(OperationCode::IBitwiseNot, na);
318 const Node b = Operation(OperationCode::IBitwiseNot, nb);
319 const Node c = Operation(OperationCode::IBitwiseNot, nc);
320 Node r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, a, b);
321 r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, r, c);
322 value = Operation(OperationCode::IBitwiseOr, value, r);
323 }
324 if (imm.GetValue() & 0x02) {
325 const Node a = Operation(OperationCode::IBitwiseNot, na);
326 const Node b = Operation(OperationCode::IBitwiseNot, nb);
327 Node r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, a, b);
328 r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, r, nc);
329 value = Operation(OperationCode::IBitwiseOr, value, r);
330 }
331 if (imm.GetValue() & 0x04) {
332 const Node a = Operation(OperationCode::IBitwiseNot, na);
333 const Node c = Operation(OperationCode::IBitwiseNot, nc);
334 Node r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, a, nb);
335 r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, r, c);
336 value = Operation(OperationCode::IBitwiseOr, value, r);
337 }
338 if (imm.GetValue() & 0x08) {
339 const Node a = Operation(OperationCode::IBitwiseNot, na);
340 Node r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, a, nb);
341 r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, r, nc);
342 value = Operation(OperationCode::IBitwiseOr, value, r);
343 }
344 if (imm.GetValue() & 0x10) {
345 const Node b = Operation(OperationCode::IBitwiseNot, nb);
346 const Node c = Operation(OperationCode::IBitwiseNot, nc);
347 Node r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, na, b);
348 r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, r, c);
349 value = Operation(OperationCode::IBitwiseOr, value, r);
350 }
351 if (imm.GetValue() & 0x20) {
352 const Node b = Operation(OperationCode::IBitwiseNot, nb);
353 Node r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, na, b);
354 r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, r, nc);
355 value = Operation(OperationCode::IBitwiseOr, value, r);
356 }
357 if (imm.GetValue() & 0x40) {
358 const Node c = Operation(OperationCode::IBitwiseNot, nc);
359 Node r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, na, nb);
360 r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, r, c);
361 value = Operation(OperationCode::IBitwiseOr, value, r);
362 }
363 if (imm.GetValue() & 0x80) {
364 Node r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, na, nb);
365 r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, r, nc);
366 value = Operation(OperationCode::IBitwiseOr, value, r);
367 }
368 return value;
369 }(op_a, op_b, op_c, imm_lut);
370
371 SetInternalFlagsFromInteger(bb, lop3_fast, sets_cc);
372 SetRegister(bb, dest, lop3_fast);
373}
374
375} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode/arithmetic_integer_immediate.cpp b/src/video_core/shader/decode/arithmetic_integer_immediate.cpp
deleted file mode 100644
index 73580277a..000000000
--- a/src/video_core/shader/decode/arithmetic_integer_immediate.cpp
+++ /dev/null
@@ -1,99 +0,0 @@
1// Copyright 2018 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/assert.h"
6#include "common/common_types.h"
7#include "video_core/engines/shader_bytecode.h"
8#include "video_core/shader/node_helper.h"
9#include "video_core/shader/shader_ir.h"
10
11namespace VideoCommon::Shader {
12
13using Tegra::Shader::Instruction;
14using Tegra::Shader::LogicOperation;
15using Tegra::Shader::OpCode;
16using Tegra::Shader::Pred;
17using Tegra::Shader::PredicateResultMode;
18using Tegra::Shader::Register;
19
20u32 ShaderIR::DecodeArithmeticIntegerImmediate(NodeBlock& bb, u32 pc) {
21 const Instruction instr = {program_code[pc]};
22 const auto opcode = OpCode::Decode(instr);
23
24 Node op_a = GetRegister(instr.gpr8);
25 Node op_b = Immediate(static_cast<s32>(instr.alu.imm20_32));
26
27 switch (opcode->get().GetId()) {
28 case OpCode::Id::IADD32I: {
29 UNIMPLEMENTED_IF_MSG(instr.iadd32i.saturate, "IADD32I saturation is not implemented");
30
31 op_a = GetOperandAbsNegInteger(std::move(op_a), false, instr.iadd32i.negate_a != 0, true);
32
33 Node value = Operation(OperationCode::IAdd, PRECISE, std::move(op_a), std::move(op_b));
34
35 SetInternalFlagsFromInteger(bb, value, instr.op_32.generates_cc != 0);
36 SetRegister(bb, instr.gpr0, std::move(value));
37 break;
38 }
39 case OpCode::Id::LOP32I: {
40 if (instr.alu.lop32i.invert_a) {
41 op_a = Operation(OperationCode::IBitwiseNot, NO_PRECISE, std::move(op_a));
42 }
43
44 if (instr.alu.lop32i.invert_b) {
45 op_b = Operation(OperationCode::IBitwiseNot, NO_PRECISE, std::move(op_b));
46 }
47
48 WriteLogicOperation(bb, instr.gpr0, instr.alu.lop32i.operation, std::move(op_a),
49 std::move(op_b), PredicateResultMode::None, Pred::UnusedIndex,
50 instr.op_32.generates_cc != 0);
51 break;
52 }
53 default:
54 UNIMPLEMENTED_MSG("Unhandled ArithmeticIntegerImmediate instruction: {}",
55 opcode->get().GetName());
56 }
57
58 return pc;
59}
60
61void ShaderIR::WriteLogicOperation(NodeBlock& bb, Register dest, LogicOperation logic_op, Node op_a,
62 Node op_b, PredicateResultMode predicate_mode, Pred predicate,
63 bool sets_cc) {
64 Node result = [&] {
65 switch (logic_op) {
66 case LogicOperation::And:
67 return Operation(OperationCode::IBitwiseAnd, PRECISE, std::move(op_a), std::move(op_b));
68 case LogicOperation::Or:
69 return Operation(OperationCode::IBitwiseOr, PRECISE, std::move(op_a), std::move(op_b));
70 case LogicOperation::Xor:
71 return Operation(OperationCode::IBitwiseXor, PRECISE, std::move(op_a), std::move(op_b));
72 case LogicOperation::PassB:
73 return op_b;
74 default:
75 UNIMPLEMENTED_MSG("Unimplemented logic operation={}", logic_op);
76 return Immediate(0);
77 }
78 }();
79
80 SetInternalFlagsFromInteger(bb, result, sets_cc);
81 SetRegister(bb, dest, result);
82
83 // Write the predicate value depending on the predicate mode.
84 switch (predicate_mode) {
85 case PredicateResultMode::None:
86 // Do nothing.
87 return;
88 case PredicateResultMode::NotZero: {
89 // Set the predicate to true if the result is not zero.
90 Node compare = Operation(OperationCode::LogicalINotEqual, std::move(result), Immediate(0));
91 SetPredicate(bb, static_cast<u64>(predicate), std::move(compare));
92 break;
93 }
94 default:
95 UNIMPLEMENTED_MSG("Unimplemented predicate result mode: {}", predicate_mode);
96 }
97}
98
99} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode/bfe.cpp b/src/video_core/shader/decode/bfe.cpp
deleted file mode 100644
index 8e3b46e8e..000000000
--- a/src/video_core/shader/decode/bfe.cpp
+++ /dev/null
@@ -1,77 +0,0 @@
1// Copyright 2018 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/assert.h"
6#include "common/common_types.h"
7#include "video_core/engines/shader_bytecode.h"
8#include "video_core/shader/node_helper.h"
9#include "video_core/shader/shader_ir.h"
10
11namespace VideoCommon::Shader {
12
13using Tegra::Shader::Instruction;
14using Tegra::Shader::OpCode;
15
16u32 ShaderIR::DecodeBfe(NodeBlock& bb, u32 pc) {
17 const Instruction instr = {program_code[pc]};
18 const auto opcode = OpCode::Decode(instr);
19
20 Node op_a = GetRegister(instr.gpr8);
21 Node op_b = [&] {
22 switch (opcode->get().GetId()) {
23 case OpCode::Id::BFE_R:
24 return GetRegister(instr.gpr20);
25 case OpCode::Id::BFE_C:
26 return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset());
27 case OpCode::Id::BFE_IMM:
28 return Immediate(instr.alu.GetSignedImm20_20());
29 default:
30 UNREACHABLE();
31 return Immediate(0);
32 }
33 }();
34
35 UNIMPLEMENTED_IF_MSG(instr.bfe.rd_cc, "Condition codes in BFE is not implemented");
36
37 const bool is_signed = instr.bfe.is_signed;
38
39 // using reverse parallel method in
40 // https://graphics.stanford.edu/~seander/bithacks.html#ReverseParallel
41 // note for later if possible to implement faster method.
42 if (instr.bfe.brev) {
43 const auto swap = [&](u32 s, u32 mask) {
44 Node v1 =
45 SignedOperation(OperationCode::ILogicalShiftRight, is_signed, op_a, Immediate(s));
46 if (mask != 0) {
47 v1 = SignedOperation(OperationCode::IBitwiseAnd, is_signed, std::move(v1),
48 Immediate(mask));
49 }
50 Node v2 = op_a;
51 if (mask != 0) {
52 v2 = SignedOperation(OperationCode::IBitwiseAnd, is_signed, std::move(v2),
53 Immediate(mask));
54 }
55 v2 = SignedOperation(OperationCode::ILogicalShiftLeft, is_signed, std::move(v2),
56 Immediate(s));
57 return SignedOperation(OperationCode::IBitwiseOr, is_signed, std::move(v1),
58 std::move(v2));
59 };
60 op_a = swap(1, 0x55555555U);
61 op_a = swap(2, 0x33333333U);
62 op_a = swap(4, 0x0F0F0F0FU);
63 op_a = swap(8, 0x00FF00FFU);
64 op_a = swap(16, 0);
65 }
66
67 const auto offset = SignedOperation(OperationCode::IBitfieldExtract, is_signed, op_b,
68 Immediate(0), Immediate(8));
69 const auto bits = SignedOperation(OperationCode::IBitfieldExtract, is_signed, op_b,
70 Immediate(8), Immediate(8));
71 auto result = SignedOperation(OperationCode::IBitfieldExtract, is_signed, op_a, offset, bits);
72 SetRegister(bb, instr.gpr0, std::move(result));
73
74 return pc;
75}
76
77} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode/bfi.cpp b/src/video_core/shader/decode/bfi.cpp
deleted file mode 100644
index 70d1c055b..000000000
--- a/src/video_core/shader/decode/bfi.cpp
+++ /dev/null
@@ -1,45 +0,0 @@
1// Copyright 2018 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/assert.h"
6#include "common/common_types.h"
7#include "video_core/engines/shader_bytecode.h"
8#include "video_core/shader/node_helper.h"
9#include "video_core/shader/shader_ir.h"
10
11namespace VideoCommon::Shader {
12
13using Tegra::Shader::Instruction;
14using Tegra::Shader::OpCode;
15
16u32 ShaderIR::DecodeBfi(NodeBlock& bb, u32 pc) {
17 const Instruction instr = {program_code[pc]};
18 const auto opcode = OpCode::Decode(instr);
19
20 const auto [packed_shift, base] = [&]() -> std::pair<Node, Node> {
21 switch (opcode->get().GetId()) {
22 case OpCode::Id::BFI_RC:
23 return {GetRegister(instr.gpr39),
24 GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset())};
25 case OpCode::Id::BFI_IMM_R:
26 return {Immediate(instr.alu.GetSignedImm20_20()), GetRegister(instr.gpr39)};
27 default:
28 UNREACHABLE();
29 return {Immediate(0), Immediate(0)};
30 }
31 }();
32 const Node insert = GetRegister(instr.gpr8);
33 const Node offset = BitfieldExtract(packed_shift, 0, 8);
34 const Node bits = BitfieldExtract(packed_shift, 8, 8);
35
36 const Node value =
37 Operation(OperationCode::UBitfieldInsert, PRECISE, base, insert, offset, bits);
38
39 SetInternalFlagsFromInteger(bb, value, instr.generates_cc);
40 SetRegister(bb, instr.gpr0, value);
41
42 return pc;
43}
44
45} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode/conversion.cpp b/src/video_core/shader/decode/conversion.cpp
deleted file mode 100644
index fea7a54df..000000000
--- a/src/video_core/shader/decode/conversion.cpp
+++ /dev/null
@@ -1,321 +0,0 @@
1// Copyright 2018 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <limits>
6#include <optional>
7#include <utility>
8
9#include "common/assert.h"
10#include "common/common_types.h"
11#include "video_core/engines/shader_bytecode.h"
12#include "video_core/shader/node_helper.h"
13#include "video_core/shader/shader_ir.h"
14
15namespace VideoCommon::Shader {
16
17using Tegra::Shader::Instruction;
18using Tegra::Shader::OpCode;
19using Tegra::Shader::Register;
20
21namespace {
22
23constexpr OperationCode GetFloatSelector(u64 selector) {
24 return selector == 0 ? OperationCode::FCastHalf0 : OperationCode::FCastHalf1;
25}
26
27constexpr u32 SizeInBits(Register::Size size) {
28 switch (size) {
29 case Register::Size::Byte:
30 return 8;
31 case Register::Size::Short:
32 return 16;
33 case Register::Size::Word:
34 return 32;
35 case Register::Size::Long:
36 return 64;
37 }
38 return 0;
39}
40
41constexpr std::optional<std::pair<s32, s32>> IntegerSaturateBounds(Register::Size src_size,
42 Register::Size dst_size,
43 bool src_signed,
44 bool dst_signed) {
45 const u32 dst_bits = SizeInBits(dst_size);
46 if (src_size == Register::Size::Word && dst_size == Register::Size::Word) {
47 if (src_signed == dst_signed) {
48 return std::nullopt;
49 }
50 return std::make_pair(0, std::numeric_limits<s32>::max());
51 }
52 if (dst_signed) {
53 // Signed destination, clamp to [-128, 127] for instance
54 return std::make_pair(-(1 << (dst_bits - 1)), (1 << (dst_bits - 1)) - 1);
55 } else {
56 // Unsigned destination
57 if (dst_bits == 32) {
58 // Avoid shifting by 32, that is undefined behavior
59 return std::make_pair(0, s32(std::numeric_limits<u32>::max()));
60 }
61 return std::make_pair(0, (1 << dst_bits) - 1);
62 }
63}
64
65} // Anonymous namespace
66
67u32 ShaderIR::DecodeConversion(NodeBlock& bb, u32 pc) {
68 const Instruction instr = {program_code[pc]};
69 const auto opcode = OpCode::Decode(instr);
70
71 switch (opcode->get().GetId()) {
72 case OpCode::Id::I2I_R:
73 case OpCode::Id::I2I_C:
74 case OpCode::Id::I2I_IMM: {
75 const bool src_signed = instr.conversion.is_input_signed;
76 const bool dst_signed = instr.conversion.is_output_signed;
77 const Register::Size src_size = instr.conversion.src_size;
78 const Register::Size dst_size = instr.conversion.dst_size;
79 const u32 selector = static_cast<u32>(instr.conversion.int_src.selector);
80
81 Node value = [this, instr, opcode] {
82 switch (opcode->get().GetId()) {
83 case OpCode::Id::I2I_R:
84 return GetRegister(instr.gpr20);
85 case OpCode::Id::I2I_C:
86 return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset());
87 case OpCode::Id::I2I_IMM:
88 return Immediate(instr.alu.GetSignedImm20_20());
89 default:
90 UNREACHABLE();
91 return Immediate(0);
92 }
93 }();
94
95 // Ensure the source selector is valid
96 switch (instr.conversion.src_size) {
97 case Register::Size::Byte:
98 break;
99 case Register::Size::Short:
100 ASSERT(selector == 0 || selector == 2);
101 break;
102 default:
103 ASSERT(selector == 0);
104 break;
105 }
106
107 if (src_size != Register::Size::Word || selector != 0) {
108 value = SignedOperation(OperationCode::IBitfieldExtract, src_signed, std::move(value),
109 Immediate(selector * 8), Immediate(SizeInBits(src_size)));
110 }
111
112 value = GetOperandAbsNegInteger(std::move(value), instr.conversion.abs_a,
113 instr.conversion.negate_a, src_signed);
114
115 if (instr.alu.saturate_d) {
116 if (src_signed && !dst_signed) {
117 Node is_negative = Operation(OperationCode::LogicalUGreaterEqual, value,
118 Immediate(1 << (SizeInBits(src_size) - 1)));
119 value = Operation(OperationCode::Select, std::move(is_negative), Immediate(0),
120 std::move(value));
121
122 // Simplify generated expressions, this can be removed without semantic impact
123 SetTemporary(bb, 0, std::move(value));
124 value = GetTemporary(0);
125
126 if (dst_size != Register::Size::Word) {
127 const Node limit = Immediate((1 << SizeInBits(dst_size)) - 1);
128 Node is_large =
129 Operation(OperationCode::LogicalUGreaterThan, std::move(value), limit);
130 value = Operation(OperationCode::Select, std::move(is_large), limit,
131 std::move(value));
132 }
133 } else if (const std::optional bounds =
134 IntegerSaturateBounds(src_size, dst_size, src_signed, dst_signed)) {
135 value = SignedOperation(OperationCode::IMax, src_signed, std::move(value),
136 Immediate(bounds->first));
137 value = SignedOperation(OperationCode::IMin, src_signed, std::move(value),
138 Immediate(bounds->second));
139 }
140 } else if (dst_size != Register::Size::Word) {
141 // No saturation, we only have to mask the result
142 Node mask = Immediate((1 << SizeInBits(dst_size)) - 1);
143 value = Operation(OperationCode::UBitwiseAnd, std::move(value), std::move(mask));
144 }
145
146 SetInternalFlagsFromInteger(bb, value, instr.generates_cc);
147 SetRegister(bb, instr.gpr0, std::move(value));
148 break;
149 }
150 case OpCode::Id::I2F_R:
151 case OpCode::Id::I2F_C:
152 case OpCode::Id::I2F_IMM: {
153 UNIMPLEMENTED_IF(instr.conversion.dst_size == Register::Size::Long);
154 UNIMPLEMENTED_IF_MSG(instr.generates_cc,
155 "Condition codes generation in I2F is not implemented");
156
157 Node value = [&] {
158 switch (opcode->get().GetId()) {
159 case OpCode::Id::I2F_R:
160 return GetRegister(instr.gpr20);
161 case OpCode::Id::I2F_C:
162 return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset());
163 case OpCode::Id::I2F_IMM:
164 return Immediate(instr.alu.GetSignedImm20_20());
165 default:
166 UNREACHABLE();
167 return Immediate(0);
168 }
169 }();
170
171 const bool input_signed = instr.conversion.is_input_signed;
172
173 if (const u32 offset = static_cast<u32>(instr.conversion.int_src.selector); offset > 0) {
174 ASSERT(instr.conversion.src_size == Register::Size::Byte ||
175 instr.conversion.src_size == Register::Size::Short);
176 if (instr.conversion.src_size == Register::Size::Short) {
177 ASSERT(offset == 0 || offset == 2);
178 }
179 value = SignedOperation(OperationCode::ILogicalShiftRight, input_signed,
180 std::move(value), Immediate(offset * 8));
181 }
182
183 value = ConvertIntegerSize(value, instr.conversion.src_size, input_signed);
184 value = GetOperandAbsNegInteger(value, instr.conversion.abs_a, false, input_signed);
185 value = SignedOperation(OperationCode::FCastInteger, input_signed, PRECISE, value);
186 value = GetOperandAbsNegFloat(value, false, instr.conversion.negate_a);
187
188 SetInternalFlagsFromFloat(bb, value, instr.generates_cc);
189
190 if (instr.conversion.dst_size == Register::Size::Short) {
191 value = Operation(OperationCode::HCastFloat, PRECISE, value);
192 }
193
194 SetRegister(bb, instr.gpr0, value);
195 break;
196 }
197 case OpCode::Id::F2F_R:
198 case OpCode::Id::F2F_C:
199 case OpCode::Id::F2F_IMM: {
200 UNIMPLEMENTED_IF(instr.conversion.dst_size == Register::Size::Long);
201 UNIMPLEMENTED_IF(instr.conversion.src_size == Register::Size::Long);
202 UNIMPLEMENTED_IF_MSG(instr.generates_cc,
203 "Condition codes generation in F2F is not implemented");
204
205 Node value = [&]() {
206 switch (opcode->get().GetId()) {
207 case OpCode::Id::F2F_R:
208 return GetRegister(instr.gpr20);
209 case OpCode::Id::F2F_C:
210 return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset());
211 case OpCode::Id::F2F_IMM:
212 return GetImmediate19(instr);
213 default:
214 UNREACHABLE();
215 return Immediate(0);
216 }
217 }();
218
219 if (instr.conversion.src_size == Register::Size::Short) {
220 value = Operation(GetFloatSelector(instr.conversion.float_src.selector), NO_PRECISE,
221 std::move(value));
222 } else {
223 ASSERT(instr.conversion.float_src.selector == 0);
224 }
225
226 value = GetOperandAbsNegFloat(value, instr.conversion.abs_a, instr.conversion.negate_a);
227
228 value = [&] {
229 if (instr.conversion.src_size != instr.conversion.dst_size) {
230 // Rounding operations only matter when the source and destination conversion size
231 // is the same.
232 return value;
233 }
234 switch (instr.conversion.f2f.GetRoundingMode()) {
235 case Tegra::Shader::F2fRoundingOp::None:
236 return value;
237 case Tegra::Shader::F2fRoundingOp::Round:
238 return Operation(OperationCode::FRoundEven, value);
239 case Tegra::Shader::F2fRoundingOp::Floor:
240 return Operation(OperationCode::FFloor, value);
241 case Tegra::Shader::F2fRoundingOp::Ceil:
242 return Operation(OperationCode::FCeil, value);
243 case Tegra::Shader::F2fRoundingOp::Trunc:
244 return Operation(OperationCode::FTrunc, value);
245 default:
246 UNIMPLEMENTED_MSG("Unimplemented F2F rounding mode {}",
247 instr.conversion.f2f.rounding.Value());
248 return value;
249 }
250 }();
251 value = GetSaturatedFloat(value, instr.alu.saturate_d);
252
253 SetInternalFlagsFromFloat(bb, value, instr.generates_cc);
254
255 if (instr.conversion.dst_size == Register::Size::Short) {
256 value = Operation(OperationCode::HCastFloat, PRECISE, value);
257 }
258
259 SetRegister(bb, instr.gpr0, value);
260 break;
261 }
262 case OpCode::Id::F2I_R:
263 case OpCode::Id::F2I_C:
264 case OpCode::Id::F2I_IMM: {
265 UNIMPLEMENTED_IF(instr.conversion.src_size == Register::Size::Long);
266 UNIMPLEMENTED_IF_MSG(instr.generates_cc,
267 "Condition codes generation in F2I is not implemented");
268 Node value = [&]() {
269 switch (opcode->get().GetId()) {
270 case OpCode::Id::F2I_R:
271 return GetRegister(instr.gpr20);
272 case OpCode::Id::F2I_C:
273 return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset());
274 case OpCode::Id::F2I_IMM:
275 return GetImmediate19(instr);
276 default:
277 UNREACHABLE();
278 return Immediate(0);
279 }
280 }();
281
282 if (instr.conversion.src_size == Register::Size::Short) {
283 value = Operation(GetFloatSelector(instr.conversion.float_src.selector), NO_PRECISE,
284 std::move(value));
285 } else {
286 ASSERT(instr.conversion.float_src.selector == 0);
287 }
288
289 value = GetOperandAbsNegFloat(value, instr.conversion.abs_a, instr.conversion.negate_a);
290
291 value = [&]() {
292 switch (instr.conversion.f2i.rounding) {
293 case Tegra::Shader::F2iRoundingOp::RoundEven:
294 return Operation(OperationCode::FRoundEven, PRECISE, value);
295 case Tegra::Shader::F2iRoundingOp::Floor:
296 return Operation(OperationCode::FFloor, PRECISE, value);
297 case Tegra::Shader::F2iRoundingOp::Ceil:
298 return Operation(OperationCode::FCeil, PRECISE, value);
299 case Tegra::Shader::F2iRoundingOp::Trunc:
300 return Operation(OperationCode::FTrunc, PRECISE, value);
301 default:
302 UNIMPLEMENTED_MSG("Unimplemented F2I rounding mode {}",
303 instr.conversion.f2i.rounding.Value());
304 return Immediate(0);
305 }
306 }();
307 const bool is_signed = instr.conversion.is_output_signed;
308 value = SignedOperation(OperationCode::ICastFloat, is_signed, PRECISE, value);
309 value = ConvertIntegerSize(value, instr.conversion.dst_size, is_signed);
310
311 SetRegister(bb, instr.gpr0, value);
312 break;
313 }
314 default:
315 UNIMPLEMENTED_MSG("Unhandled conversion instruction: {}", opcode->get().GetName());
316 }
317
318 return pc;
319}
320
321} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode/ffma.cpp b/src/video_core/shader/decode/ffma.cpp
deleted file mode 100644
index 5973588d6..000000000
--- a/src/video_core/shader/decode/ffma.cpp
+++ /dev/null
@@ -1,62 +0,0 @@
1// Copyright 2018 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/assert.h"
6#include "common/common_types.h"
7#include "video_core/engines/shader_bytecode.h"
8#include "video_core/shader/node_helper.h"
9#include "video_core/shader/shader_ir.h"
10
11namespace VideoCommon::Shader {
12
13using Tegra::Shader::Instruction;
14using Tegra::Shader::OpCode;
15
16u32 ShaderIR::DecodeFfma(NodeBlock& bb, u32 pc) {
17 const Instruction instr = {program_code[pc]};
18 const auto opcode = OpCode::Decode(instr);
19
20 UNIMPLEMENTED_IF_MSG(instr.ffma.cc != 0, "FFMA cc not implemented");
21 if (instr.ffma.tab5980_0 != 1) {
22 LOG_DEBUG(HW_GPU, "FFMA tab5980_0({}) not implemented", instr.ffma.tab5980_0.Value());
23 }
24 if (instr.ffma.tab5980_1 != 0) {
25 LOG_DEBUG(HW_GPU, "FFMA tab5980_1({}) not implemented", instr.ffma.tab5980_1.Value());
26 }
27
28 const Node op_a = GetRegister(instr.gpr8);
29
30 auto [op_b, op_c] = [&]() -> std::tuple<Node, Node> {
31 switch (opcode->get().GetId()) {
32 case OpCode::Id::FFMA_CR: {
33 return {GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()),
34 GetRegister(instr.gpr39)};
35 }
36 case OpCode::Id::FFMA_RR:
37 return {GetRegister(instr.gpr20), GetRegister(instr.gpr39)};
38 case OpCode::Id::FFMA_RC: {
39 return {GetRegister(instr.gpr39),
40 GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset())};
41 }
42 case OpCode::Id::FFMA_IMM:
43 return {GetImmediate19(instr), GetRegister(instr.gpr39)};
44 default:
45 UNIMPLEMENTED_MSG("Unhandled FFMA instruction: {}", opcode->get().GetName());
46 return {Immediate(0), Immediate(0)};
47 }
48 }();
49
50 op_b = GetOperandAbsNegFloat(op_b, false, instr.ffma.negate_b);
51 op_c = GetOperandAbsNegFloat(op_c, false, instr.ffma.negate_c);
52
53 Node value = Operation(OperationCode::FFma, PRECISE, op_a, op_b, op_c);
54 value = GetSaturatedFloat(value, instr.alu.saturate_d);
55
56 SetInternalFlagsFromFloat(bb, value, instr.generates_cc);
57 SetRegister(bb, instr.gpr0, value);
58
59 return pc;
60}
61
62} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode/float_set.cpp b/src/video_core/shader/decode/float_set.cpp
deleted file mode 100644
index 5614e8a0d..000000000
--- a/src/video_core/shader/decode/float_set.cpp
+++ /dev/null
@@ -1,58 +0,0 @@
1// Copyright 2018 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/assert.h"
6#include "common/common_types.h"
7#include "video_core/engines/shader_bytecode.h"
8#include "video_core/shader/node_helper.h"
9#include "video_core/shader/shader_ir.h"
10
11namespace VideoCommon::Shader {
12
13using Tegra::Shader::Instruction;
14using Tegra::Shader::OpCode;
15
16u32 ShaderIR::DecodeFloatSet(NodeBlock& bb, u32 pc) {
17 const Instruction instr = {program_code[pc]};
18
19 const Node op_a = GetOperandAbsNegFloat(GetRegister(instr.gpr8), instr.fset.abs_a != 0,
20 instr.fset.neg_a != 0);
21
22 Node op_b = [&]() {
23 if (instr.is_b_imm) {
24 return GetImmediate19(instr);
25 } else if (instr.is_b_gpr) {
26 return GetRegister(instr.gpr20);
27 } else {
28 return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset());
29 }
30 }();
31
32 op_b = GetOperandAbsNegFloat(op_b, instr.fset.abs_b != 0, instr.fset.neg_b != 0);
33
34 // The fset instruction sets a register to 1.0 or -1 (depending on the bf bit) if the
35 // condition is true, and to 0 otherwise.
36 const Node second_pred = GetPredicate(instr.fset.pred39, instr.fset.neg_pred != 0);
37
38 const OperationCode combiner = GetPredicateCombiner(instr.fset.op);
39 const Node first_pred = GetPredicateComparisonFloat(instr.fset.cond, op_a, op_b);
40
41 const Node predicate = Operation(combiner, first_pred, second_pred);
42
43 const Node true_value = instr.fset.bf ? Immediate(1.0f) : Immediate(-1);
44 const Node false_value = instr.fset.bf ? Immediate(0.0f) : Immediate(0);
45 const Node value =
46 Operation(OperationCode::Select, PRECISE, predicate, true_value, false_value);
47
48 if (instr.fset.bf) {
49 SetInternalFlagsFromFloat(bb, value, instr.generates_cc);
50 } else {
51 SetInternalFlagsFromInteger(bb, value, instr.generates_cc);
52 }
53 SetRegister(bb, instr.gpr0, value);
54
55 return pc;
56}
57
58} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode/float_set_predicate.cpp b/src/video_core/shader/decode/float_set_predicate.cpp
deleted file mode 100644
index 200c2c983..000000000
--- a/src/video_core/shader/decode/float_set_predicate.cpp
+++ /dev/null
@@ -1,57 +0,0 @@
1// Copyright 2018 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/assert.h"
6#include "common/common_types.h"
7#include "video_core/engines/shader_bytecode.h"
8#include "video_core/shader/node_helper.h"
9#include "video_core/shader/shader_ir.h"
10
11namespace VideoCommon::Shader {
12
13using Tegra::Shader::Instruction;
14using Tegra::Shader::OpCode;
15using Tegra::Shader::Pred;
16
17u32 ShaderIR::DecodeFloatSetPredicate(NodeBlock& bb, u32 pc) {
18 const Instruction instr = {program_code[pc]};
19
20 Node op_a = GetOperandAbsNegFloat(GetRegister(instr.gpr8), instr.fsetp.abs_a != 0,
21 instr.fsetp.neg_a != 0);
22 Node op_b = [&]() {
23 if (instr.is_b_imm) {
24 return GetImmediate19(instr);
25 } else if (instr.is_b_gpr) {
26 return GetRegister(instr.gpr20);
27 } else {
28 return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset());
29 }
30 }();
31 op_b = GetOperandAbsNegFloat(std::move(op_b), instr.fsetp.abs_b, instr.fsetp.neg_b);
32
33 // We can't use the constant predicate as destination.
34 ASSERT(instr.fsetp.pred3 != static_cast<u64>(Pred::UnusedIndex));
35
36 const Node predicate =
37 GetPredicateComparisonFloat(instr.fsetp.cond, std::move(op_a), std::move(op_b));
38 const Node second_pred = GetPredicate(instr.fsetp.pred39, instr.fsetp.neg_pred != 0);
39
40 const OperationCode combiner = GetPredicateCombiner(instr.fsetp.op);
41 const Node value = Operation(combiner, predicate, second_pred);
42
43 // Set the primary predicate to the result of Predicate OP SecondPredicate
44 SetPredicate(bb, instr.fsetp.pred3, value);
45
46 if (instr.fsetp.pred0 != static_cast<u64>(Pred::UnusedIndex)) {
47 // Set the secondary predicate to the result of !Predicate OP SecondPredicate,
48 // if enabled
49 const Node negated_pred = Operation(OperationCode::LogicalNegate, predicate);
50 const Node second_value = Operation(combiner, negated_pred, second_pred);
51 SetPredicate(bb, instr.fsetp.pred0, second_value);
52 }
53
54 return pc;
55}
56
57} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode/half_set.cpp b/src/video_core/shader/decode/half_set.cpp
deleted file mode 100644
index fa83108cd..000000000
--- a/src/video_core/shader/decode/half_set.cpp
+++ /dev/null
@@ -1,115 +0,0 @@
1// Copyright 2018 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <array>
6
7#include "common/assert.h"
8#include "common/common_types.h"
9#include "common/logging/log.h"
10#include "video_core/engines/shader_bytecode.h"
11#include "video_core/shader/node_helper.h"
12#include "video_core/shader/shader_ir.h"
13
14namespace VideoCommon::Shader {
15
16using std::move;
17using Tegra::Shader::Instruction;
18using Tegra::Shader::OpCode;
19using Tegra::Shader::PredCondition;
20
21u32 ShaderIR::DecodeHalfSet(NodeBlock& bb, u32 pc) {
22 const Instruction instr = {program_code[pc]};
23 const auto opcode = OpCode::Decode(instr);
24
25 PredCondition cond{};
26 bool bf = false;
27 bool ftz = false;
28 bool neg_a = false;
29 bool abs_a = false;
30 bool neg_b = false;
31 bool abs_b = false;
32 switch (opcode->get().GetId()) {
33 case OpCode::Id::HSET2_C:
34 case OpCode::Id::HSET2_IMM:
35 cond = instr.hsetp2.cbuf_and_imm.cond;
36 bf = instr.Bit(53);
37 ftz = instr.Bit(54);
38 neg_a = instr.Bit(43);
39 abs_a = instr.Bit(44);
40 neg_b = instr.Bit(56);
41 abs_b = instr.Bit(54);
42 break;
43 case OpCode::Id::HSET2_R:
44 cond = instr.hsetp2.reg.cond;
45 bf = instr.Bit(49);
46 ftz = instr.Bit(50);
47 neg_a = instr.Bit(43);
48 abs_a = instr.Bit(44);
49 neg_b = instr.Bit(31);
50 abs_b = instr.Bit(30);
51 break;
52 default:
53 UNREACHABLE();
54 }
55
56 Node op_b = [this, instr, opcode] {
57 switch (opcode->get().GetId()) {
58 case OpCode::Id::HSET2_C:
59 // Inform as unimplemented as this is not tested.
60 UNIMPLEMENTED_MSG("HSET2_C is not implemented");
61 return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset());
62 case OpCode::Id::HSET2_R:
63 return GetRegister(instr.gpr20);
64 case OpCode::Id::HSET2_IMM:
65 return UnpackHalfImmediate(instr, true);
66 default:
67 UNREACHABLE();
68 return Node{};
69 }
70 }();
71
72 if (!ftz) {
73 LOG_DEBUG(HW_GPU, "{} without FTZ is not implemented", opcode->get().GetName());
74 }
75
76 Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.hset2.type_a);
77 op_a = GetOperandAbsNegHalf(op_a, abs_a, neg_a);
78
79 switch (opcode->get().GetId()) {
80 case OpCode::Id::HSET2_R:
81 op_b = GetOperandAbsNegHalf(move(op_b), abs_b, neg_b);
82 [[fallthrough]];
83 case OpCode::Id::HSET2_C:
84 op_b = UnpackHalfFloat(move(op_b), instr.hset2.type_b);
85 break;
86 default:
87 break;
88 }
89
90 Node second_pred = GetPredicate(instr.hset2.pred39, instr.hset2.neg_pred);
91
92 Node comparison_pair = GetPredicateComparisonHalf(cond, op_a, op_b);
93
94 const OperationCode combiner = GetPredicateCombiner(instr.hset2.op);
95
96 // HSET2 operates on each half float in the pack.
97 std::array<Node, 2> values;
98 for (u32 i = 0; i < 2; ++i) {
99 const u32 raw_value = bf ? 0x3c00 : 0xffff;
100 Node true_value = Immediate(raw_value << (i * 16));
101 Node false_value = Immediate(0);
102
103 Node comparison = Operation(OperationCode::LogicalPick2, comparison_pair, Immediate(i));
104 Node predicate = Operation(combiner, comparison, second_pred);
105 values[i] =
106 Operation(OperationCode::Select, predicate, move(true_value), move(false_value));
107 }
108
109 Node value = Operation(OperationCode::UBitwiseOr, values[0], values[1]);
110 SetRegister(bb, instr.gpr0, move(value));
111
112 return pc;
113}
114
115} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode/half_set_predicate.cpp b/src/video_core/shader/decode/half_set_predicate.cpp
deleted file mode 100644
index 310655619..000000000
--- a/src/video_core/shader/decode/half_set_predicate.cpp
+++ /dev/null
@@ -1,80 +0,0 @@
1// Copyright 2018 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/assert.h"
6#include "common/common_types.h"
7#include "common/logging/log.h"
8#include "video_core/engines/shader_bytecode.h"
9#include "video_core/shader/node_helper.h"
10#include "video_core/shader/shader_ir.h"
11
12namespace VideoCommon::Shader {
13
14using Tegra::Shader::Instruction;
15using Tegra::Shader::OpCode;
16using Tegra::Shader::Pred;
17
18u32 ShaderIR::DecodeHalfSetPredicate(NodeBlock& bb, u32 pc) {
19 const Instruction instr = {program_code[pc]};
20 const auto opcode = OpCode::Decode(instr);
21
22 if (instr.hsetp2.ftz != 0) {
23 LOG_DEBUG(HW_GPU, "{} without FTZ is not implemented", opcode->get().GetName());
24 }
25
26 Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.hsetp2.type_a);
27 op_a = GetOperandAbsNegHalf(op_a, instr.hsetp2.abs_a, instr.hsetp2.negate_a);
28
29 Tegra::Shader::PredCondition cond{};
30 bool h_and{};
31 Node op_b{};
32 switch (opcode->get().GetId()) {
33 case OpCode::Id::HSETP2_C:
34 cond = instr.hsetp2.cbuf_and_imm.cond;
35 h_and = instr.hsetp2.cbuf_and_imm.h_and;
36 op_b = GetOperandAbsNegHalf(GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()),
37 instr.hsetp2.cbuf.abs_b, instr.hsetp2.cbuf.negate_b);
38 // F32 is hardcoded in hardware
39 op_b = UnpackHalfFloat(std::move(op_b), Tegra::Shader::HalfType::F32);
40 break;
41 case OpCode::Id::HSETP2_IMM:
42 cond = instr.hsetp2.cbuf_and_imm.cond;
43 h_and = instr.hsetp2.cbuf_and_imm.h_and;
44 op_b = UnpackHalfImmediate(instr, true);
45 break;
46 case OpCode::Id::HSETP2_R:
47 cond = instr.hsetp2.reg.cond;
48 h_and = instr.hsetp2.reg.h_and;
49 op_b =
50 GetOperandAbsNegHalf(UnpackHalfFloat(GetRegister(instr.gpr20), instr.hsetp2.reg.type_b),
51 instr.hsetp2.reg.abs_b, instr.hsetp2.reg.negate_b);
52 break;
53 default:
54 UNREACHABLE();
55 op_b = Immediate(0);
56 }
57
58 const OperationCode combiner = GetPredicateCombiner(instr.hsetp2.op);
59 const Node combined_pred = GetPredicate(instr.hsetp2.pred39, instr.hsetp2.neg_pred);
60
61 const auto Write = [&](u64 dest, Node src) {
62 SetPredicate(bb, dest, Operation(combiner, std::move(src), combined_pred));
63 };
64
65 const Node comparison = GetPredicateComparisonHalf(cond, op_a, op_b);
66 const u64 first = instr.hsetp2.pred3;
67 const u64 second = instr.hsetp2.pred0;
68 if (h_and) {
69 Node joined = Operation(OperationCode::LogicalAnd2, comparison);
70 Write(first, joined);
71 Write(second, Operation(OperationCode::LogicalNegate, std::move(joined)));
72 } else {
73 Write(first, Operation(OperationCode::LogicalPick2, comparison, Immediate(0U)));
74 Write(second, Operation(OperationCode::LogicalPick2, comparison, Immediate(1U)));
75 }
76
77 return pc;
78}
79
80} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode/hfma2.cpp b/src/video_core/shader/decode/hfma2.cpp
deleted file mode 100644
index 5b44cb79c..000000000
--- a/src/video_core/shader/decode/hfma2.cpp
+++ /dev/null
@@ -1,73 +0,0 @@
1// Copyright 2018 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <tuple>
6
7#include "common/assert.h"
8#include "common/common_types.h"
9#include "video_core/engines/shader_bytecode.h"
10#include "video_core/shader/node_helper.h"
11#include "video_core/shader/shader_ir.h"
12
13namespace VideoCommon::Shader {
14
15using Tegra::Shader::HalfPrecision;
16using Tegra::Shader::HalfType;
17using Tegra::Shader::Instruction;
18using Tegra::Shader::OpCode;
19
20u32 ShaderIR::DecodeHfma2(NodeBlock& bb, u32 pc) {
21 const Instruction instr = {program_code[pc]};
22 const auto opcode = OpCode::Decode(instr);
23
24 if (opcode->get().GetId() == OpCode::Id::HFMA2_RR) {
25 DEBUG_ASSERT(instr.hfma2.rr.precision == HalfPrecision::None);
26 } else {
27 DEBUG_ASSERT(instr.hfma2.precision == HalfPrecision::None);
28 }
29
30 constexpr auto identity = HalfType::H0_H1;
31 bool neg_b{}, neg_c{};
32 auto [saturate, type_b, op_b, type_c,
33 op_c] = [&]() -> std::tuple<bool, HalfType, Node, HalfType, Node> {
34 switch (opcode->get().GetId()) {
35 case OpCode::Id::HFMA2_CR:
36 neg_b = instr.hfma2.negate_b;
37 neg_c = instr.hfma2.negate_c;
38 return {instr.hfma2.saturate, HalfType::F32,
39 GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()),
40 instr.hfma2.type_reg39, GetRegister(instr.gpr39)};
41 case OpCode::Id::HFMA2_RC:
42 neg_b = instr.hfma2.negate_b;
43 neg_c = instr.hfma2.negate_c;
44 return {instr.hfma2.saturate, instr.hfma2.type_reg39, GetRegister(instr.gpr39),
45 HalfType::F32, GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset())};
46 case OpCode::Id::HFMA2_RR:
47 neg_b = instr.hfma2.rr.negate_b;
48 neg_c = instr.hfma2.rr.negate_c;
49 return {instr.hfma2.rr.saturate, instr.hfma2.type_b, GetRegister(instr.gpr20),
50 instr.hfma2.rr.type_c, GetRegister(instr.gpr39)};
51 case OpCode::Id::HFMA2_IMM_R:
52 neg_c = instr.hfma2.negate_c;
53 return {instr.hfma2.saturate, identity, UnpackHalfImmediate(instr, true),
54 instr.hfma2.type_reg39, GetRegister(instr.gpr39)};
55 default:
56 return {false, identity, Immediate(0), identity, Immediate(0)};
57 }
58 }();
59
60 const Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.hfma2.type_a);
61 op_b = GetOperandAbsNegHalf(UnpackHalfFloat(op_b, type_b), false, neg_b);
62 op_c = GetOperandAbsNegHalf(UnpackHalfFloat(op_c, type_c), false, neg_c);
63
64 Node value = Operation(OperationCode::HFma, PRECISE, op_a, op_b, op_c);
65 value = GetSaturatedHalfFloat(value, saturate);
66 value = HalfMerge(GetRegister(instr.gpr0), value, instr.hfma2.merge);
67
68 SetRegister(bb, instr.gpr0, value);
69
70 return pc;
71}
72
73} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode/image.cpp b/src/video_core/shader/decode/image.cpp
deleted file mode 100644
index 5470e8cf4..000000000
--- a/src/video_core/shader/decode/image.cpp
+++ /dev/null
@@ -1,536 +0,0 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <algorithm>
6#include <vector>
7#include <fmt/format.h>
8
9#include "common/assert.h"
10#include "common/bit_field.h"
11#include "common/common_types.h"
12#include "common/logging/log.h"
13#include "video_core/engines/shader_bytecode.h"
14#include "video_core/shader/node_helper.h"
15#include "video_core/shader/shader_ir.h"
16#include "video_core/textures/texture.h"
17
18namespace VideoCommon::Shader {
19
20using Tegra::Shader::Instruction;
21using Tegra::Shader::OpCode;
22using Tegra::Shader::PredCondition;
23using Tegra::Shader::StoreType;
24using Tegra::Texture::ComponentType;
25using Tegra::Texture::TextureFormat;
26using Tegra::Texture::TICEntry;
27
28namespace {
29
30ComponentType GetComponentType(Tegra::Engines::SamplerDescriptor descriptor,
31 std::size_t component) {
32 const TextureFormat format{descriptor.format};
33 switch (format) {
34 case TextureFormat::R16G16B16A16:
35 case TextureFormat::R32G32B32A32:
36 case TextureFormat::R32G32B32:
37 case TextureFormat::R32G32:
38 case TextureFormat::R16G16:
39 case TextureFormat::R32:
40 case TextureFormat::R16:
41 case TextureFormat::R8:
42 case TextureFormat::R1:
43 if (component == 0) {
44 return descriptor.r_type;
45 }
46 if (component == 1) {
47 return descriptor.g_type;
48 }
49 if (component == 2) {
50 return descriptor.b_type;
51 }
52 if (component == 3) {
53 return descriptor.a_type;
54 }
55 break;
56 case TextureFormat::A8R8G8B8:
57 if (component == 0) {
58 return descriptor.a_type;
59 }
60 if (component == 1) {
61 return descriptor.r_type;
62 }
63 if (component == 2) {
64 return descriptor.g_type;
65 }
66 if (component == 3) {
67 return descriptor.b_type;
68 }
69 break;
70 case TextureFormat::A2B10G10R10:
71 case TextureFormat::A4B4G4R4:
72 case TextureFormat::A5B5G5R1:
73 case TextureFormat::A1B5G5R5:
74 if (component == 0) {
75 return descriptor.a_type;
76 }
77 if (component == 1) {
78 return descriptor.b_type;
79 }
80 if (component == 2) {
81 return descriptor.g_type;
82 }
83 if (component == 3) {
84 return descriptor.r_type;
85 }
86 break;
87 case TextureFormat::R32_B24G8:
88 if (component == 0) {
89 return descriptor.r_type;
90 }
91 if (component == 1) {
92 return descriptor.b_type;
93 }
94 if (component == 2) {
95 return descriptor.g_type;
96 }
97 break;
98 case TextureFormat::B5G6R5:
99 case TextureFormat::B6G5R5:
100 case TextureFormat::B10G11R11:
101 if (component == 0) {
102 return descriptor.b_type;
103 }
104 if (component == 1) {
105 return descriptor.g_type;
106 }
107 if (component == 2) {
108 return descriptor.r_type;
109 }
110 break;
111 case TextureFormat::R24G8:
112 case TextureFormat::R8G24:
113 case TextureFormat::R8G8:
114 case TextureFormat::G4R4:
115 if (component == 0) {
116 return descriptor.g_type;
117 }
118 if (component == 1) {
119 return descriptor.r_type;
120 }
121 break;
122 default:
123 break;
124 }
125 UNIMPLEMENTED_MSG("Texture format not implemented={}", format);
126 return ComponentType::FLOAT;
127}
128
129bool IsComponentEnabled(std::size_t component_mask, std::size_t component) {
130 constexpr u8 R = 0b0001;
131 constexpr u8 G = 0b0010;
132 constexpr u8 B = 0b0100;
133 constexpr u8 A = 0b1000;
134 constexpr std::array<u8, 16> mask = {
135 0, (R), (G), (R | G), (B), (R | B), (G | B), (R | G | B),
136 (A), (R | A), (G | A), (R | G | A), (B | A), (R | B | A), (G | B | A), (R | G | B | A)};
137 return std::bitset<4>{mask.at(component_mask)}.test(component);
138}
139
140u32 GetComponentSize(TextureFormat format, std::size_t component) {
141 switch (format) {
142 case TextureFormat::R32G32B32A32:
143 return 32;
144 case TextureFormat::R16G16B16A16:
145 return 16;
146 case TextureFormat::R32G32B32:
147 return component <= 2 ? 32 : 0;
148 case TextureFormat::R32G32:
149 return component <= 1 ? 32 : 0;
150 case TextureFormat::R16G16:
151 return component <= 1 ? 16 : 0;
152 case TextureFormat::R32:
153 return component == 0 ? 32 : 0;
154 case TextureFormat::R16:
155 return component == 0 ? 16 : 0;
156 case TextureFormat::R8:
157 return component == 0 ? 8 : 0;
158 case TextureFormat::R1:
159 return component == 0 ? 1 : 0;
160 case TextureFormat::A8R8G8B8:
161 return 8;
162 case TextureFormat::A2B10G10R10:
163 return (component == 3 || component == 2 || component == 1) ? 10 : 2;
164 case TextureFormat::A4B4G4R4:
165 return 4;
166 case TextureFormat::A5B5G5R1:
167 return (component == 0 || component == 1 || component == 2) ? 5 : 1;
168 case TextureFormat::A1B5G5R5:
169 return (component == 1 || component == 2 || component == 3) ? 5 : 1;
170 case TextureFormat::R32_B24G8:
171 if (component == 0) {
172 return 32;
173 }
174 if (component == 1) {
175 return 24;
176 }
177 if (component == 2) {
178 return 8;
179 }
180 return 0;
181 case TextureFormat::B5G6R5:
182 if (component == 0 || component == 2) {
183 return 5;
184 }
185 if (component == 1) {
186 return 6;
187 }
188 return 0;
189 case TextureFormat::B6G5R5:
190 if (component == 1 || component == 2) {
191 return 5;
192 }
193 if (component == 0) {
194 return 6;
195 }
196 return 0;
197 case TextureFormat::B10G11R11:
198 if (component == 1 || component == 2) {
199 return 11;
200 }
201 if (component == 0) {
202 return 10;
203 }
204 return 0;
205 case TextureFormat::R24G8:
206 if (component == 0) {
207 return 8;
208 }
209 if (component == 1) {
210 return 24;
211 }
212 return 0;
213 case TextureFormat::R8G24:
214 if (component == 0) {
215 return 24;
216 }
217 if (component == 1) {
218 return 8;
219 }
220 return 0;
221 case TextureFormat::R8G8:
222 return (component == 0 || component == 1) ? 8 : 0;
223 case TextureFormat::G4R4:
224 return (component == 0 || component == 1) ? 4 : 0;
225 default:
226 UNIMPLEMENTED_MSG("Texture format not implemented={}", format);
227 return 0;
228 }
229}
230
231std::size_t GetImageComponentMask(TextureFormat format) {
232 constexpr u8 R = 0b0001;
233 constexpr u8 G = 0b0010;
234 constexpr u8 B = 0b0100;
235 constexpr u8 A = 0b1000;
236 switch (format) {
237 case TextureFormat::R32G32B32A32:
238 case TextureFormat::R16G16B16A16:
239 case TextureFormat::A8R8G8B8:
240 case TextureFormat::A2B10G10R10:
241 case TextureFormat::A4B4G4R4:
242 case TextureFormat::A5B5G5R1:
243 case TextureFormat::A1B5G5R5:
244 return std::size_t{R | G | B | A};
245 case TextureFormat::R32G32B32:
246 case TextureFormat::R32_B24G8:
247 case TextureFormat::B5G6R5:
248 case TextureFormat::B6G5R5:
249 case TextureFormat::B10G11R11:
250 return std::size_t{R | G | B};
251 case TextureFormat::R32G32:
252 case TextureFormat::R16G16:
253 case TextureFormat::R24G8:
254 case TextureFormat::R8G24:
255 case TextureFormat::R8G8:
256 case TextureFormat::G4R4:
257 return std::size_t{R | G};
258 case TextureFormat::R32:
259 case TextureFormat::R16:
260 case TextureFormat::R8:
261 case TextureFormat::R1:
262 return std::size_t{R};
263 default:
264 UNIMPLEMENTED_MSG("Texture format not implemented={}", format);
265 return std::size_t{R | G | B | A};
266 }
267}
268
269std::size_t GetImageTypeNumCoordinates(Tegra::Shader::ImageType image_type) {
270 switch (image_type) {
271 case Tegra::Shader::ImageType::Texture1D:
272 case Tegra::Shader::ImageType::TextureBuffer:
273 return 1;
274 case Tegra::Shader::ImageType::Texture1DArray:
275 case Tegra::Shader::ImageType::Texture2D:
276 return 2;
277 case Tegra::Shader::ImageType::Texture2DArray:
278 case Tegra::Shader::ImageType::Texture3D:
279 return 3;
280 }
281 UNREACHABLE();
282 return 1;
283}
284} // Anonymous namespace
285
286std::pair<Node, bool> ShaderIR::GetComponentValue(ComponentType component_type, u32 component_size,
287 Node original_value) {
288 switch (component_type) {
289 case ComponentType::SNORM: {
290 // range [-1.0, 1.0]
291 auto cnv_value = Operation(OperationCode::FMul, original_value,
292 Immediate(static_cast<float>(1 << component_size) / 2.f - 1.f));
293 cnv_value = Operation(OperationCode::ICastFloat, std::move(cnv_value));
294 return {BitfieldExtract(std::move(cnv_value), 0, component_size), true};
295 }
296 case ComponentType::SINT:
297 case ComponentType::UNORM: {
298 bool is_signed = component_type == ComponentType::SINT;
299 // range [0.0, 1.0]
300 auto cnv_value = Operation(OperationCode::FMul, original_value,
301 Immediate(static_cast<float>(1 << component_size) - 1.f));
302 return {SignedOperation(OperationCode::ICastFloat, is_signed, std::move(cnv_value)),
303 is_signed};
304 }
305 case ComponentType::UINT: // range [0, (1 << component_size) - 1]
306 return {std::move(original_value), false};
307 case ComponentType::FLOAT:
308 if (component_size == 16) {
309 return {Operation(OperationCode::HCastFloat, original_value), true};
310 } else {
311 return {std::move(original_value), true};
312 }
313 default:
314 UNIMPLEMENTED_MSG("Unimplemented component type={}", component_type);
315 return {std::move(original_value), true};
316 }
317}
318
319u32 ShaderIR::DecodeImage(NodeBlock& bb, u32 pc) {
320 const Instruction instr = {program_code[pc]};
321 const auto opcode = OpCode::Decode(instr);
322
323 const auto GetCoordinates = [this, instr](Tegra::Shader::ImageType image_type) {
324 std::vector<Node> coords;
325 const std::size_t num_coords{GetImageTypeNumCoordinates(image_type)};
326 coords.reserve(num_coords);
327 for (std::size_t i = 0; i < num_coords; ++i) {
328 coords.push_back(GetRegister(instr.gpr8.Value() + i));
329 }
330 return coords;
331 };
332
333 switch (opcode->get().GetId()) {
334 case OpCode::Id::SULD: {
335 UNIMPLEMENTED_IF(instr.suldst.out_of_bounds_store !=
336 Tegra::Shader::OutOfBoundsStore::Ignore);
337
338 const auto type{instr.suldst.image_type};
339 auto& image{instr.suldst.is_immediate ? GetImage(instr.image, type)
340 : GetBindlessImage(instr.gpr39, type)};
341 image.MarkRead();
342
343 if (instr.suldst.mode == Tegra::Shader::SurfaceDataMode::P) {
344 u32 indexer = 0;
345 for (u32 element = 0; element < 4; ++element) {
346 if (!instr.suldst.IsComponentEnabled(element)) {
347 continue;
348 }
349 MetaImage meta{image, {}, element};
350 Node value = Operation(OperationCode::ImageLoad, meta, GetCoordinates(type));
351 SetTemporary(bb, indexer++, std::move(value));
352 }
353 for (u32 i = 0; i < indexer; ++i) {
354 SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i));
355 }
356 } else if (instr.suldst.mode == Tegra::Shader::SurfaceDataMode::D_BA) {
357 UNIMPLEMENTED_IF(instr.suldst.GetStoreDataLayout() != StoreType::Bits32 &&
358 instr.suldst.GetStoreDataLayout() != StoreType::Bits64);
359
360 auto descriptor = [this, instr] {
361 std::optional<Tegra::Engines::SamplerDescriptor> sampler_descriptor;
362 if (instr.suldst.is_immediate) {
363 sampler_descriptor =
364 registry.ObtainBoundSampler(static_cast<u32>(instr.image.index.Value()));
365 } else {
366 const Node image_register = GetRegister(instr.gpr39);
367 const auto result = TrackCbuf(image_register, global_code,
368 static_cast<s64>(global_code.size()));
369 const auto buffer = std::get<1>(result);
370 const auto offset = std::get<2>(result);
371 sampler_descriptor = registry.ObtainBindlessSampler(buffer, offset);
372 }
373 if (!sampler_descriptor) {
374 UNREACHABLE_MSG("Failed to obtain image descriptor");
375 }
376 return *sampler_descriptor;
377 }();
378
379 const auto comp_mask = GetImageComponentMask(descriptor.format);
380
381 switch (instr.suldst.GetStoreDataLayout()) {
382 case StoreType::Bits32:
383 case StoreType::Bits64: {
384 u32 indexer = 0;
385 u32 shifted_counter = 0;
386 Node value = Immediate(0);
387 for (u32 element = 0; element < 4; ++element) {
388 if (!IsComponentEnabled(comp_mask, element)) {
389 continue;
390 }
391 const auto component_type = GetComponentType(descriptor, element);
392 const auto component_size = GetComponentSize(descriptor.format, element);
393 MetaImage meta{image, {}, element};
394
395 auto [converted_value, is_signed] = GetComponentValue(
396 component_type, component_size,
397 Operation(OperationCode::ImageLoad, meta, GetCoordinates(type)));
398
399 // shift element to correct position
400 const auto shifted = shifted_counter;
401 if (shifted > 0) {
402 converted_value =
403 SignedOperation(OperationCode::ILogicalShiftLeft, is_signed,
404 std::move(converted_value), Immediate(shifted));
405 }
406 shifted_counter += component_size;
407
408 // add value into result
409 value = Operation(OperationCode::UBitwiseOr, value, std::move(converted_value));
410
411 // if we shifted enough for 1 byte -> we save it into temp
412 if (shifted_counter >= 32) {
413 SetTemporary(bb, indexer++, std::move(value));
414 // reset counter and value to prepare pack next byte
415 value = Immediate(0);
416 shifted_counter = 0;
417 }
418 }
419 for (u32 i = 0; i < indexer; ++i) {
420 SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i));
421 }
422 break;
423 }
424 default:
425 UNREACHABLE();
426 break;
427 }
428 }
429 break;
430 }
431 case OpCode::Id::SUST: {
432 UNIMPLEMENTED_IF(instr.suldst.mode != Tegra::Shader::SurfaceDataMode::P);
433 UNIMPLEMENTED_IF(instr.suldst.out_of_bounds_store !=
434 Tegra::Shader::OutOfBoundsStore::Ignore);
435 UNIMPLEMENTED_IF(instr.suldst.component_mask_selector != 0xf); // Ensure we have RGBA
436
437 std::vector<Node> values;
438 constexpr std::size_t hardcoded_size{4};
439 for (std::size_t i = 0; i < hardcoded_size; ++i) {
440 values.push_back(GetRegister(instr.gpr0.Value() + i));
441 }
442
443 const auto type{instr.suldst.image_type};
444 auto& image{instr.suldst.is_immediate ? GetImage(instr.image, type)
445 : GetBindlessImage(instr.gpr39, type)};
446 image.MarkWrite();
447
448 MetaImage meta{image, std::move(values)};
449 bb.push_back(Operation(OperationCode::ImageStore, meta, GetCoordinates(type)));
450 break;
451 }
452 case OpCode::Id::SUATOM: {
453 UNIMPLEMENTED_IF(instr.suatom_d.is_ba != 0);
454
455 const OperationCode operation_code = [instr] {
456 switch (instr.suatom_d.operation_type) {
457 case Tegra::Shader::ImageAtomicOperationType::S32:
458 case Tegra::Shader::ImageAtomicOperationType::U32:
459 switch (instr.suatom_d.operation) {
460 case Tegra::Shader::ImageAtomicOperation::Add:
461 return OperationCode::AtomicImageAdd;
462 case Tegra::Shader::ImageAtomicOperation::And:
463 return OperationCode::AtomicImageAnd;
464 case Tegra::Shader::ImageAtomicOperation::Or:
465 return OperationCode::AtomicImageOr;
466 case Tegra::Shader::ImageAtomicOperation::Xor:
467 return OperationCode::AtomicImageXor;
468 case Tegra::Shader::ImageAtomicOperation::Exch:
469 return OperationCode::AtomicImageExchange;
470 default:
471 break;
472 }
473 break;
474 default:
475 break;
476 }
477 UNIMPLEMENTED_MSG("Unimplemented operation={}, type={}",
478 static_cast<u64>(instr.suatom_d.operation.Value()),
479 static_cast<u64>(instr.suatom_d.operation_type.Value()));
480 return OperationCode::AtomicImageAdd;
481 }();
482
483 Node value = GetRegister(instr.gpr0);
484
485 const auto type = instr.suatom_d.image_type;
486 auto& image = GetImage(instr.image, type);
487 image.MarkAtomic();
488
489 MetaImage meta{image, {std::move(value)}};
490 SetRegister(bb, instr.gpr0, Operation(operation_code, meta, GetCoordinates(type)));
491 break;
492 }
493 default:
494 UNIMPLEMENTED_MSG("Unhandled image instruction: {}", opcode->get().GetName());
495 }
496
497 return pc;
498}
499
500ImageEntry& ShaderIR::GetImage(Tegra::Shader::Image image, Tegra::Shader::ImageType type) {
501 const auto offset = static_cast<u32>(image.index.Value());
502
503 const auto it =
504 std::find_if(std::begin(used_images), std::end(used_images),
505 [offset](const ImageEntry& entry) { return entry.offset == offset; });
506 if (it != std::end(used_images)) {
507 ASSERT(!it->is_bindless && it->type == type);
508 return *it;
509 }
510
511 const auto next_index = static_cast<u32>(used_images.size());
512 return used_images.emplace_back(next_index, offset, type);
513}
514
515ImageEntry& ShaderIR::GetBindlessImage(Tegra::Shader::Register reg, Tegra::Shader::ImageType type) {
516 const Node image_register = GetRegister(reg);
517 const auto result =
518 TrackCbuf(image_register, global_code, static_cast<s64>(global_code.size()));
519
520 const auto buffer = std::get<1>(result);
521 const auto offset = std::get<2>(result);
522
523 const auto it = std::find_if(std::begin(used_images), std::end(used_images),
524 [buffer, offset](const ImageEntry& entry) {
525 return entry.buffer == buffer && entry.offset == offset;
526 });
527 if (it != std::end(used_images)) {
528 ASSERT(it->is_bindless && it->type == type);
529 return *it;
530 }
531
532 const auto next_index = static_cast<u32>(used_images.size());
533 return used_images.emplace_back(next_index, offset, buffer, type);
534}
535
536} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode/integer_set.cpp b/src/video_core/shader/decode/integer_set.cpp
deleted file mode 100644
index 59809bcd8..000000000
--- a/src/video_core/shader/decode/integer_set.cpp
+++ /dev/null
@@ -1,49 +0,0 @@
1// Copyright 2018 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/common_types.h"
6#include "video_core/engines/shader_bytecode.h"
7#include "video_core/shader/node_helper.h"
8#include "video_core/shader/shader_ir.h"
9
10namespace VideoCommon::Shader {
11
12using Tegra::Shader::Instruction;
13using Tegra::Shader::OpCode;
14
15u32 ShaderIR::DecodeIntegerSet(NodeBlock& bb, u32 pc) {
16 const Instruction instr = {program_code[pc]};
17
18 const Node op_a = GetRegister(instr.gpr8);
19 const Node op_b = [&]() {
20 if (instr.is_b_imm) {
21 return Immediate(instr.alu.GetSignedImm20_20());
22 } else if (instr.is_b_gpr) {
23 return GetRegister(instr.gpr20);
24 } else {
25 return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset());
26 }
27 }();
28
29 // The iset instruction sets a register to 1.0 or -1 (depending on the bf bit) if the condition
30 // is true, and to 0 otherwise.
31 const Node second_pred = GetPredicate(instr.iset.pred39, instr.iset.neg_pred != 0);
32 const Node first_pred =
33 GetPredicateComparisonInteger(instr.iset.cond, instr.iset.is_signed, op_a, op_b);
34
35 const OperationCode combiner = GetPredicateCombiner(instr.iset.op);
36
37 const Node predicate = Operation(combiner, first_pred, second_pred);
38
39 const Node true_value = instr.iset.bf ? Immediate(1.0f) : Immediate(-1);
40 const Node false_value = instr.iset.bf ? Immediate(0.0f) : Immediate(0);
41 const Node value =
42 Operation(OperationCode::Select, PRECISE, predicate, true_value, false_value);
43
44 SetRegister(bb, instr.gpr0, value);
45
46 return pc;
47}
48
49} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode/integer_set_predicate.cpp b/src/video_core/shader/decode/integer_set_predicate.cpp
deleted file mode 100644
index 25e48fef8..000000000
--- a/src/video_core/shader/decode/integer_set_predicate.cpp
+++ /dev/null
@@ -1,53 +0,0 @@
1// Copyright 2018 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/assert.h"
6#include "common/common_types.h"
7#include "video_core/engines/shader_bytecode.h"
8#include "video_core/shader/node_helper.h"
9#include "video_core/shader/shader_ir.h"
10
11namespace VideoCommon::Shader {
12
13using Tegra::Shader::Instruction;
14using Tegra::Shader::OpCode;
15using Tegra::Shader::Pred;
16
17u32 ShaderIR::DecodeIntegerSetPredicate(NodeBlock& bb, u32 pc) {
18 const Instruction instr = {program_code[pc]};
19
20 const Node op_a = GetRegister(instr.gpr8);
21
22 const Node op_b = [&]() {
23 if (instr.is_b_imm) {
24 return Immediate(instr.alu.GetSignedImm20_20());
25 } else if (instr.is_b_gpr) {
26 return GetRegister(instr.gpr20);
27 } else {
28 return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset());
29 }
30 }();
31
32 // We can't use the constant predicate as destination.
33 ASSERT(instr.isetp.pred3 != static_cast<u64>(Pred::UnusedIndex));
34
35 const Node second_pred = GetPredicate(instr.isetp.pred39, instr.isetp.neg_pred != 0);
36 const Node predicate =
37 GetPredicateComparisonInteger(instr.isetp.cond, instr.isetp.is_signed, op_a, op_b);
38
39 // Set the primary predicate to the result of Predicate OP SecondPredicate
40 const OperationCode combiner = GetPredicateCombiner(instr.isetp.op);
41 const Node value = Operation(combiner, predicate, second_pred);
42 SetPredicate(bb, instr.isetp.pred3, value);
43
44 if (instr.isetp.pred0 != static_cast<u64>(Pred::UnusedIndex)) {
45 // Set the secondary predicate to the result of !Predicate OP SecondPredicate, if enabled
46 const Node negated_pred = Operation(OperationCode::LogicalNegate, predicate);
47 SetPredicate(bb, instr.isetp.pred0, Operation(combiner, negated_pred, second_pred));
48 }
49
50 return pc;
51}
52
53} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode/memory.cpp b/src/video_core/shader/decode/memory.cpp
deleted file mode 100644
index 7728f600e..000000000
--- a/src/video_core/shader/decode/memory.cpp
+++ /dev/null
@@ -1,493 +0,0 @@
1// Copyright 2018 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <algorithm>
6#include <utility>
7#include <vector>
8
9#include <fmt/format.h>
10
11#include "common/alignment.h"
12#include "common/assert.h"
13#include "common/common_types.h"
14#include "common/logging/log.h"
15#include "video_core/engines/shader_bytecode.h"
16#include "video_core/shader/node_helper.h"
17#include "video_core/shader/shader_ir.h"
18
19namespace VideoCommon::Shader {
20
21using std::move;
22using Tegra::Shader::AtomicOp;
23using Tegra::Shader::AtomicType;
24using Tegra::Shader::Attribute;
25using Tegra::Shader::GlobalAtomicType;
26using Tegra::Shader::Instruction;
27using Tegra::Shader::OpCode;
28using Tegra::Shader::Register;
29using Tegra::Shader::StoreType;
30
31namespace {
32
33OperationCode GetAtomOperation(AtomicOp op) {
34 switch (op) {
35 case AtomicOp::Add:
36 return OperationCode::AtomicIAdd;
37 case AtomicOp::Min:
38 return OperationCode::AtomicIMin;
39 case AtomicOp::Max:
40 return OperationCode::AtomicIMax;
41 case AtomicOp::And:
42 return OperationCode::AtomicIAnd;
43 case AtomicOp::Or:
44 return OperationCode::AtomicIOr;
45 case AtomicOp::Xor:
46 return OperationCode::AtomicIXor;
47 case AtomicOp::Exch:
48 return OperationCode::AtomicIExchange;
49 default:
50 UNIMPLEMENTED_MSG("op={}", op);
51 return OperationCode::AtomicIAdd;
52 }
53}
54
55bool IsUnaligned(Tegra::Shader::UniformType uniform_type) {
56 return uniform_type == Tegra::Shader::UniformType::UnsignedByte ||
57 uniform_type == Tegra::Shader::UniformType::UnsignedShort;
58}
59
60u32 GetUnalignedMask(Tegra::Shader::UniformType uniform_type) {
61 switch (uniform_type) {
62 case Tegra::Shader::UniformType::UnsignedByte:
63 return 0b11;
64 case Tegra::Shader::UniformType::UnsignedShort:
65 return 0b10;
66 default:
67 UNREACHABLE();
68 return 0;
69 }
70}
71
72u32 GetMemorySize(Tegra::Shader::UniformType uniform_type) {
73 switch (uniform_type) {
74 case Tegra::Shader::UniformType::UnsignedByte:
75 return 8;
76 case Tegra::Shader::UniformType::UnsignedShort:
77 return 16;
78 case Tegra::Shader::UniformType::Single:
79 return 32;
80 case Tegra::Shader::UniformType::Double:
81 return 64;
82 case Tegra::Shader::UniformType::Quad:
83 case Tegra::Shader::UniformType::UnsignedQuad:
84 return 128;
85 default:
86 UNIMPLEMENTED_MSG("Unimplemented size={}!", uniform_type);
87 return 32;
88 }
89}
90
91Node ExtractUnaligned(Node value, Node address, u32 mask, u32 size) {
92 Node offset = Operation(OperationCode::UBitwiseAnd, address, Immediate(mask));
93 offset = Operation(OperationCode::ULogicalShiftLeft, move(offset), Immediate(3));
94 return Operation(OperationCode::UBitfieldExtract, move(value), move(offset), Immediate(size));
95}
96
97Node InsertUnaligned(Node dest, Node value, Node address, u32 mask, u32 size) {
98 Node offset = Operation(OperationCode::UBitwiseAnd, move(address), Immediate(mask));
99 offset = Operation(OperationCode::ULogicalShiftLeft, move(offset), Immediate(3));
100 return Operation(OperationCode::UBitfieldInsert, move(dest), move(value), move(offset),
101 Immediate(size));
102}
103
104Node Sign16Extend(Node value) {
105 Node sign = Operation(OperationCode::UBitwiseAnd, value, Immediate(1U << 15));
106 Node is_sign = Operation(OperationCode::LogicalUEqual, move(sign), Immediate(1U << 15));
107 Node extend = Operation(OperationCode::Select, is_sign, Immediate(0xFFFF0000), Immediate(0));
108 return Operation(OperationCode::UBitwiseOr, move(value), move(extend));
109}
110
111} // Anonymous namespace
112
113u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
114 const Instruction instr = {program_code[pc]};
115 const auto opcode = OpCode::Decode(instr);
116
117 switch (opcode->get().GetId()) {
118 case OpCode::Id::LD_A: {
119 // Note: Shouldn't this be interp mode flat? As in no interpolation made.
120 UNIMPLEMENTED_IF_MSG(instr.gpr8.Value() != Register::ZeroIndex,
121 "Indirect attribute loads are not supported");
122 UNIMPLEMENTED_IF_MSG((instr.attribute.fmt20.immediate.Value() % sizeof(u32)) != 0,
123 "Unaligned attribute loads are not supported");
124 UNIMPLEMENTED_IF_MSG(instr.attribute.fmt20.IsPhysical() &&
125 instr.attribute.fmt20.size != Tegra::Shader::AttributeSize::Word,
126 "Non-32 bits PHYS reads are not implemented");
127
128 const Node buffer{GetRegister(instr.gpr39)};
129
130 u64 next_element = instr.attribute.fmt20.element;
131 auto next_index = static_cast<u64>(instr.attribute.fmt20.index.Value());
132
133 const auto LoadNextElement = [&](u32 reg_offset) {
134 const Node attribute{instr.attribute.fmt20.IsPhysical()
135 ? GetPhysicalInputAttribute(instr.gpr8, buffer)
136 : GetInputAttribute(static_cast<Attribute::Index>(next_index),
137 next_element, buffer)};
138
139 SetRegister(bb, instr.gpr0.Value() + reg_offset, attribute);
140
141 // Load the next attribute element into the following register. If the element
142 // to load goes beyond the vec4 size, load the first element of the next
143 // attribute.
144 next_element = (next_element + 1) % 4;
145 next_index = next_index + (next_element == 0 ? 1 : 0);
146 };
147
148 const u32 num_words = static_cast<u32>(instr.attribute.fmt20.size.Value()) + 1;
149 for (u32 reg_offset = 0; reg_offset < num_words; ++reg_offset) {
150 LoadNextElement(reg_offset);
151 }
152 break;
153 }
154 case OpCode::Id::LD_C: {
155 UNIMPLEMENTED_IF(instr.ld_c.unknown != 0);
156
157 Node index = GetRegister(instr.gpr8);
158
159 const Node op_a =
160 GetConstBufferIndirect(instr.cbuf36.index, instr.cbuf36.GetOffset() + 0, index);
161
162 switch (instr.ld_c.type.Value()) {
163 case Tegra::Shader::UniformType::Single:
164 SetRegister(bb, instr.gpr0, op_a);
165 break;
166
167 case Tegra::Shader::UniformType::Double: {
168 const Node op_b =
169 GetConstBufferIndirect(instr.cbuf36.index, instr.cbuf36.GetOffset() + 4, index);
170
171 SetTemporary(bb, 0, op_a);
172 SetTemporary(bb, 1, op_b);
173 SetRegister(bb, instr.gpr0, GetTemporary(0));
174 SetRegister(bb, instr.gpr0.Value() + 1, GetTemporary(1));
175 break;
176 }
177 default:
178 UNIMPLEMENTED_MSG("Unhandled type: {}", instr.ld_c.type.Value());
179 }
180 break;
181 }
182 case OpCode::Id::LD_L:
183 LOG_DEBUG(HW_GPU, "LD_L cache management mode: {}", instr.ld_l.unknown);
184 [[fallthrough]];
185 case OpCode::Id::LD_S: {
186 const auto GetAddress = [&](s32 offset) {
187 ASSERT(offset % 4 == 0);
188 const Node immediate_offset = Immediate(static_cast<s32>(instr.smem_imm) + offset);
189 return Operation(OperationCode::IAdd, GetRegister(instr.gpr8), immediate_offset);
190 };
191 const auto GetMemory = [&](s32 offset) {
192 return opcode->get().GetId() == OpCode::Id::LD_S ? GetSharedMemory(GetAddress(offset))
193 : GetLocalMemory(GetAddress(offset));
194 };
195
196 switch (instr.ldst_sl.type.Value()) {
197 case StoreType::Signed16:
198 SetRegister(bb, instr.gpr0,
199 Sign16Extend(ExtractUnaligned(GetMemory(0), GetAddress(0), 0b10, 16)));
200 break;
201 case StoreType::Bits32:
202 case StoreType::Bits64:
203 case StoreType::Bits128: {
204 const u32 count = [&] {
205 switch (instr.ldst_sl.type.Value()) {
206 case StoreType::Bits32:
207 return 1;
208 case StoreType::Bits64:
209 return 2;
210 case StoreType::Bits128:
211 return 4;
212 default:
213 UNREACHABLE();
214 return 0;
215 }
216 }();
217 for (u32 i = 0; i < count; ++i) {
218 SetTemporary(bb, i, GetMemory(i * 4));
219 }
220 for (u32 i = 0; i < count; ++i) {
221 SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i));
222 }
223 break;
224 }
225 default:
226 UNIMPLEMENTED_MSG("{} Unhandled type: {}", opcode->get().GetName(),
227 instr.ldst_sl.type.Value());
228 }
229 break;
230 }
231 case OpCode::Id::LD:
232 case OpCode::Id::LDG: {
233 const auto type = [instr, &opcode]() -> Tegra::Shader::UniformType {
234 switch (opcode->get().GetId()) {
235 case OpCode::Id::LD:
236 UNIMPLEMENTED_IF_MSG(!instr.generic.extended, "Unextended LD is not implemented");
237 return instr.generic.type;
238 case OpCode::Id::LDG:
239 return instr.ldg.type;
240 default:
241 UNREACHABLE();
242 return {};
243 }
244 }();
245
246 const auto [real_address_base, base_address, descriptor] =
247 TrackGlobalMemory(bb, instr, true, false);
248
249 const u32 size = GetMemorySize(type);
250 const u32 count = Common::AlignUp(size, 32) / 32;
251 if (!real_address_base || !base_address) {
252 // Tracking failed, load zeroes.
253 for (u32 i = 0; i < count; ++i) {
254 SetRegister(bb, instr.gpr0.Value() + i, Immediate(0.0f));
255 }
256 break;
257 }
258
259 for (u32 i = 0; i < count; ++i) {
260 const Node it_offset = Immediate(i * 4);
261 const Node real_address = Operation(OperationCode::UAdd, real_address_base, it_offset);
262 Node gmem = MakeNode<GmemNode>(real_address, base_address, descriptor);
263
264 // To handle unaligned loads get the bytes used to dereference global memory and extract
265 // those bytes from the loaded u32.
266 if (IsUnaligned(type)) {
267 gmem = ExtractUnaligned(gmem, real_address, GetUnalignedMask(type), size);
268 }
269
270 SetTemporary(bb, i, gmem);
271 }
272
273 for (u32 i = 0; i < count; ++i) {
274 SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i));
275 }
276 break;
277 }
278 case OpCode::Id::ST_A: {
279 UNIMPLEMENTED_IF_MSG(instr.gpr8.Value() != Register::ZeroIndex,
280 "Indirect attribute loads are not supported");
281 UNIMPLEMENTED_IF_MSG((instr.attribute.fmt20.immediate.Value() % sizeof(u32)) != 0,
282 "Unaligned attribute loads are not supported");
283
284 u64 element = instr.attribute.fmt20.element;
285 auto index = static_cast<u64>(instr.attribute.fmt20.index.Value());
286
287 const u32 num_words = static_cast<u32>(instr.attribute.fmt20.size.Value()) + 1;
288 for (u32 reg_offset = 0; reg_offset < num_words; ++reg_offset) {
289 Node dest;
290 if (instr.attribute.fmt20.patch) {
291 const u32 offset = static_cast<u32>(index) * 4 + static_cast<u32>(element);
292 dest = MakeNode<PatchNode>(offset);
293 } else {
294 dest = GetOutputAttribute(static_cast<Attribute::Index>(index), element,
295 GetRegister(instr.gpr39));
296 }
297 const auto src = GetRegister(instr.gpr0.Value() + reg_offset);
298
299 bb.push_back(Operation(OperationCode::Assign, dest, src));
300
301 // Load the next attribute element into the following register. If the element to load
302 // goes beyond the vec4 size, load the first element of the next attribute.
303 element = (element + 1) % 4;
304 index = index + (element == 0 ? 1 : 0);
305 }
306 break;
307 }
308 case OpCode::Id::ST_L:
309 LOG_DEBUG(HW_GPU, "ST_L cache management mode: {}", instr.st_l.cache_management.Value());
310 [[fallthrough]];
311 case OpCode::Id::ST_S: {
312 const auto GetAddress = [&](s32 offset) {
313 ASSERT(offset % 4 == 0);
314 const Node immediate = Immediate(static_cast<s32>(instr.smem_imm) + offset);
315 return Operation(OperationCode::IAdd, NO_PRECISE, GetRegister(instr.gpr8), immediate);
316 };
317
318 const bool is_local = opcode->get().GetId() == OpCode::Id::ST_L;
319 const auto set_memory = is_local ? &ShaderIR::SetLocalMemory : &ShaderIR::SetSharedMemory;
320 const auto get_memory = is_local ? &ShaderIR::GetLocalMemory : &ShaderIR::GetSharedMemory;
321
322 switch (instr.ldst_sl.type.Value()) {
323 case StoreType::Bits128:
324 (this->*set_memory)(bb, GetAddress(12), GetRegister(instr.gpr0.Value() + 3));
325 (this->*set_memory)(bb, GetAddress(8), GetRegister(instr.gpr0.Value() + 2));
326 [[fallthrough]];
327 case StoreType::Bits64:
328 (this->*set_memory)(bb, GetAddress(4), GetRegister(instr.gpr0.Value() + 1));
329 [[fallthrough]];
330 case StoreType::Bits32:
331 (this->*set_memory)(bb, GetAddress(0), GetRegister(instr.gpr0));
332 break;
333 case StoreType::Unsigned16:
334 case StoreType::Signed16: {
335 Node address = GetAddress(0);
336 Node memory = (this->*get_memory)(address);
337 (this->*set_memory)(
338 bb, address, InsertUnaligned(memory, GetRegister(instr.gpr0), address, 0b10, 16));
339 break;
340 }
341 default:
342 UNIMPLEMENTED_MSG("{} unhandled type: {}", opcode->get().GetName(),
343 instr.ldst_sl.type.Value());
344 }
345 break;
346 }
347 case OpCode::Id::ST:
348 case OpCode::Id::STG: {
349 const auto type = [instr, &opcode]() -> Tegra::Shader::UniformType {
350 switch (opcode->get().GetId()) {
351 case OpCode::Id::ST:
352 UNIMPLEMENTED_IF_MSG(!instr.generic.extended, "Unextended ST is not implemented");
353 return instr.generic.type;
354 case OpCode::Id::STG:
355 return instr.stg.type;
356 default:
357 UNREACHABLE();
358 return {};
359 }
360 }();
361
362 // For unaligned reads we have to read memory too.
363 const bool is_read = IsUnaligned(type);
364 const auto [real_address_base, base_address, descriptor] =
365 TrackGlobalMemory(bb, instr, is_read, true);
366 if (!real_address_base || !base_address) {
367 // Tracking failed, skip the store.
368 break;
369 }
370
371 const u32 size = GetMemorySize(type);
372 const u32 count = Common::AlignUp(size, 32) / 32;
373 for (u32 i = 0; i < count; ++i) {
374 const Node it_offset = Immediate(i * 4);
375 const Node real_address = Operation(OperationCode::UAdd, real_address_base, it_offset);
376 const Node gmem = MakeNode<GmemNode>(real_address, base_address, descriptor);
377 Node value = GetRegister(instr.gpr0.Value() + i);
378
379 if (IsUnaligned(type)) {
380 const u32 mask = GetUnalignedMask(type);
381 value = InsertUnaligned(gmem, move(value), real_address, mask, size);
382 }
383
384 bb.push_back(Operation(OperationCode::Assign, gmem, value));
385 }
386 break;
387 }
388 case OpCode::Id::RED: {
389 UNIMPLEMENTED_IF_MSG(instr.red.type != GlobalAtomicType::U32, "type={}",
390 instr.red.type.Value());
391 const auto [real_address, base_address, descriptor] =
392 TrackGlobalMemory(bb, instr, true, true);
393 if (!real_address || !base_address) {
394 // Tracking failed, skip atomic.
395 break;
396 }
397 Node gmem = MakeNode<GmemNode>(real_address, base_address, descriptor);
398 Node value = GetRegister(instr.gpr0);
399 bb.push_back(Operation(GetAtomOperation(instr.red.operation), move(gmem), move(value)));
400 break;
401 }
402 case OpCode::Id::ATOM: {
403 UNIMPLEMENTED_IF_MSG(instr.atom.operation == AtomicOp::Inc ||
404 instr.atom.operation == AtomicOp::Dec ||
405 instr.atom.operation == AtomicOp::SafeAdd,
406 "operation={}", instr.atom.operation.Value());
407 UNIMPLEMENTED_IF_MSG(instr.atom.type == GlobalAtomicType::S64 ||
408 instr.atom.type == GlobalAtomicType::U64 ||
409 instr.atom.type == GlobalAtomicType::F16x2_FTZ_RN ||
410 instr.atom.type == GlobalAtomicType::F32_FTZ_RN,
411 "type={}", instr.atom.type.Value());
412
413 const auto [real_address, base_address, descriptor] =
414 TrackGlobalMemory(bb, instr, true, true);
415 if (!real_address || !base_address) {
416 // Tracking failed, skip atomic.
417 break;
418 }
419
420 const bool is_signed =
421 instr.atom.type == GlobalAtomicType::S32 || instr.atom.type == GlobalAtomicType::S64;
422 Node gmem = MakeNode<GmemNode>(real_address, base_address, descriptor);
423 SetRegister(bb, instr.gpr0,
424 SignedOperation(GetAtomOperation(instr.atom.operation), is_signed, gmem,
425 GetRegister(instr.gpr20)));
426 break;
427 }
428 case OpCode::Id::ATOMS: {
429 UNIMPLEMENTED_IF_MSG(instr.atoms.operation == AtomicOp::Inc ||
430 instr.atoms.operation == AtomicOp::Dec,
431 "operation={}", instr.atoms.operation.Value());
432 UNIMPLEMENTED_IF_MSG(instr.atoms.type == AtomicType::S64 ||
433 instr.atoms.type == AtomicType::U64,
434 "type={}", instr.atoms.type.Value());
435 const bool is_signed =
436 instr.atoms.type == AtomicType::S32 || instr.atoms.type == AtomicType::S64;
437 const s32 offset = instr.atoms.GetImmediateOffset();
438 Node address = GetRegister(instr.gpr8);
439 address = Operation(OperationCode::IAdd, move(address), Immediate(offset));
440 SetRegister(bb, instr.gpr0,
441 SignedOperation(GetAtomOperation(instr.atoms.operation), is_signed,
442 GetSharedMemory(move(address)), GetRegister(instr.gpr20)));
443 break;
444 }
445 case OpCode::Id::AL2P: {
446 // Ignore al2p.direction since we don't care about it.
447
448 // Calculate emulation fake physical address.
449 const Node fixed_address{Immediate(static_cast<u32>(instr.al2p.address))};
450 const Node reg{GetRegister(instr.gpr8)};
451 const Node fake_address{Operation(OperationCode::IAdd, NO_PRECISE, reg, fixed_address)};
452
453 // Set the fake address to target register.
454 SetRegister(bb, instr.gpr0, fake_address);
455
456 // Signal the shader IR to declare all possible attributes and varyings
457 uses_physical_attributes = true;
458 break;
459 }
460 default:
461 UNIMPLEMENTED_MSG("Unhandled memory instruction: {}", opcode->get().GetName());
462 }
463
464 return pc;
465}
466
467std::tuple<Node, Node, GlobalMemoryBase> ShaderIR::TrackGlobalMemory(NodeBlock& bb,
468 Instruction instr,
469 bool is_read, bool is_write) {
470 const auto addr_register{GetRegister(instr.gmem.gpr)};
471 const auto immediate_offset{static_cast<u32>(instr.gmem.offset)};
472
473 const auto [base_address, index, offset] =
474 TrackCbuf(addr_register, global_code, static_cast<s64>(global_code.size()));
475 ASSERT_OR_EXECUTE_MSG(
476 base_address != nullptr, { return std::make_tuple(nullptr, nullptr, GlobalMemoryBase{}); },
477 "Global memory tracking failed");
478
479 bb.push_back(Comment(fmt::format("Base address is c[0x{:x}][0x{:x}]", index, offset)));
480
481 const GlobalMemoryBase descriptor{index, offset};
482 const auto& entry = used_global_memory.try_emplace(descriptor).first;
483 auto& usage = entry->second;
484 usage.is_written |= is_write;
485 usage.is_read |= is_read;
486
487 const auto real_address =
488 Operation(OperationCode::UAdd, NO_PRECISE, Immediate(immediate_offset), addr_register);
489
490 return {real_address, base_address, descriptor};
491}
492
493} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode/other.cpp b/src/video_core/shader/decode/other.cpp
deleted file mode 100644
index 5f88537bc..000000000
--- a/src/video_core/shader/decode/other.cpp
+++ /dev/null
@@ -1,322 +0,0 @@
1// Copyright 2018 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/assert.h"
6#include "common/common_types.h"
7#include "common/logging/log.h"
8#include "video_core/engines/shader_bytecode.h"
9#include "video_core/shader/node_helper.h"
10#include "video_core/shader/shader_ir.h"
11
12namespace VideoCommon::Shader {
13
14using std::move;
15using Tegra::Shader::ConditionCode;
16using Tegra::Shader::Instruction;
17using Tegra::Shader::IpaInterpMode;
18using Tegra::Shader::OpCode;
19using Tegra::Shader::PixelImap;
20using Tegra::Shader::Register;
21using Tegra::Shader::SystemVariable;
22
23using Index = Tegra::Shader::Attribute::Index;
24
25u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) {
26 const Instruction instr = {program_code[pc]};
27 const auto opcode = OpCode::Decode(instr);
28
29 switch (opcode->get().GetId()) {
30 case OpCode::Id::NOP: {
31 UNIMPLEMENTED_IF(instr.nop.cc != Tegra::Shader::ConditionCode::T);
32 UNIMPLEMENTED_IF(instr.nop.trigger != 0);
33 // With the previous preconditions, this instruction is a no-operation.
34 break;
35 }
36 case OpCode::Id::EXIT: {
37 const ConditionCode cc = instr.flow_condition_code;
38 UNIMPLEMENTED_IF_MSG(cc != ConditionCode::T, "EXIT condition code used: {}", cc);
39
40 switch (instr.flow.cond) {
41 case Tegra::Shader::FlowCondition::Always:
42 bb.push_back(Operation(OperationCode::Exit));
43 if (instr.pred.pred_index == static_cast<u64>(Pred::UnusedIndex)) {
44 // If this is an unconditional exit then just end processing here,
45 // otherwise we have to account for the possibility of the condition
46 // not being met, so continue processing the next instruction.
47 pc = MAX_PROGRAM_LENGTH - 1;
48 }
49 break;
50
51 case Tegra::Shader::FlowCondition::Fcsm_Tr:
52 // TODO(bunnei): What is this used for? If we assume this conditon is not
53 // satisifed, dual vertex shaders in Farming Simulator make more sense
54 UNIMPLEMENTED_MSG("Skipping unknown FlowCondition::Fcsm_Tr");
55 break;
56
57 default:
58 UNIMPLEMENTED_MSG("Unhandled flow condition: {}", instr.flow.cond.Value());
59 }
60 break;
61 }
62 case OpCode::Id::KIL: {
63 UNIMPLEMENTED_IF(instr.flow.cond != Tegra::Shader::FlowCondition::Always);
64
65 const ConditionCode cc = instr.flow_condition_code;
66 UNIMPLEMENTED_IF_MSG(cc != ConditionCode::T, "KIL condition code used: {}", cc);
67
68 bb.push_back(Operation(OperationCode::Discard));
69 break;
70 }
71 case OpCode::Id::S2R: {
72 const Node value = [this, instr] {
73 switch (instr.sys20) {
74 case SystemVariable::LaneId:
75 return Operation(OperationCode::ThreadId);
76 case SystemVariable::InvocationId:
77 return Operation(OperationCode::InvocationId);
78 case SystemVariable::Ydirection:
79 uses_y_negate = true;
80 return Operation(OperationCode::YNegate);
81 case SystemVariable::InvocationInfo:
82 LOG_WARNING(HW_GPU, "S2R instruction with InvocationInfo is incomplete");
83 return Immediate(0x00ff'0000U);
84 case SystemVariable::WscaleFactorXY:
85 UNIMPLEMENTED_MSG("S2R WscaleFactorXY is not implemented");
86 return Immediate(0U);
87 case SystemVariable::WscaleFactorZ:
88 UNIMPLEMENTED_MSG("S2R WscaleFactorZ is not implemented");
89 return Immediate(0U);
90 case SystemVariable::Tid: {
91 Node val = Immediate(0);
92 val = BitfieldInsert(val, Operation(OperationCode::LocalInvocationIdX), 0, 9);
93 val = BitfieldInsert(val, Operation(OperationCode::LocalInvocationIdY), 16, 9);
94 val = BitfieldInsert(val, Operation(OperationCode::LocalInvocationIdZ), 26, 5);
95 return val;
96 }
97 case SystemVariable::TidX:
98 return Operation(OperationCode::LocalInvocationIdX);
99 case SystemVariable::TidY:
100 return Operation(OperationCode::LocalInvocationIdY);
101 case SystemVariable::TidZ:
102 return Operation(OperationCode::LocalInvocationIdZ);
103 case SystemVariable::CtaIdX:
104 return Operation(OperationCode::WorkGroupIdX);
105 case SystemVariable::CtaIdY:
106 return Operation(OperationCode::WorkGroupIdY);
107 case SystemVariable::CtaIdZ:
108 return Operation(OperationCode::WorkGroupIdZ);
109 case SystemVariable::EqMask:
110 case SystemVariable::LtMask:
111 case SystemVariable::LeMask:
112 case SystemVariable::GtMask:
113 case SystemVariable::GeMask:
114 uses_warps = true;
115 switch (instr.sys20) {
116 case SystemVariable::EqMask:
117 return Operation(OperationCode::ThreadEqMask);
118 case SystemVariable::LtMask:
119 return Operation(OperationCode::ThreadLtMask);
120 case SystemVariable::LeMask:
121 return Operation(OperationCode::ThreadLeMask);
122 case SystemVariable::GtMask:
123 return Operation(OperationCode::ThreadGtMask);
124 case SystemVariable::GeMask:
125 return Operation(OperationCode::ThreadGeMask);
126 default:
127 UNREACHABLE();
128 return Immediate(0u);
129 }
130 default:
131 UNIMPLEMENTED_MSG("Unhandled system move: {}", instr.sys20.Value());
132 return Immediate(0u);
133 }
134 }();
135 SetRegister(bb, instr.gpr0, value);
136
137 break;
138 }
139 case OpCode::Id::BRA: {
140 Node branch;
141 if (instr.bra.constant_buffer == 0) {
142 const u32 target = pc + instr.bra.GetBranchTarget();
143 branch = Operation(OperationCode::Branch, Immediate(target));
144 } else {
145 const u32 target = pc + 1;
146 const Node op_a = GetConstBuffer(instr.cbuf36.index, instr.cbuf36.GetOffset());
147 const Node convert = SignedOperation(OperationCode::IArithmeticShiftRight, true,
148 PRECISE, op_a, Immediate(3));
149 const Node operand =
150 Operation(OperationCode::IAdd, PRECISE, convert, Immediate(target));
151 branch = Operation(OperationCode::BranchIndirect, operand);
152 }
153
154 const Tegra::Shader::ConditionCode cc = instr.flow_condition_code;
155 if (cc != Tegra::Shader::ConditionCode::T) {
156 bb.push_back(Conditional(GetConditionCode(cc), {branch}));
157 } else {
158 bb.push_back(branch);
159 }
160 break;
161 }
162 case OpCode::Id::BRX: {
163 Node operand;
164 if (instr.brx.constant_buffer != 0) {
165 const s32 target = pc + 1;
166 const Node index = GetRegister(instr.gpr8);
167 const Node op_a =
168 GetConstBufferIndirect(instr.cbuf36.index, instr.cbuf36.GetOffset() + 0, index);
169 const Node convert = SignedOperation(OperationCode::IArithmeticShiftRight, true,
170 PRECISE, op_a, Immediate(3));
171 operand = Operation(OperationCode::IAdd, PRECISE, convert, Immediate(target));
172 } else {
173 const s32 target = pc + instr.brx.GetBranchExtend();
174 const Node op_a = GetRegister(instr.gpr8);
175 const Node convert = SignedOperation(OperationCode::IArithmeticShiftRight, true,
176 PRECISE, op_a, Immediate(3));
177 operand = Operation(OperationCode::IAdd, PRECISE, convert, Immediate(target));
178 }
179 const Node branch = Operation(OperationCode::BranchIndirect, operand);
180
181 const ConditionCode cc = instr.flow_condition_code;
182 if (cc != ConditionCode::T) {
183 bb.push_back(Conditional(GetConditionCode(cc), {branch}));
184 } else {
185 bb.push_back(branch);
186 }
187 break;
188 }
189 case OpCode::Id::SSY: {
190 UNIMPLEMENTED_IF_MSG(instr.bra.constant_buffer != 0,
191 "Constant buffer flow is not supported");
192
193 if (disable_flow_stack) {
194 break;
195 }
196
197 // The SSY opcode tells the GPU where to re-converge divergent execution paths with SYNC.
198 const u32 target = pc + instr.bra.GetBranchTarget();
199 bb.push_back(
200 Operation(OperationCode::PushFlowStack, MetaStackClass::Ssy, Immediate(target)));
201 break;
202 }
203 case OpCode::Id::PBK: {
204 UNIMPLEMENTED_IF_MSG(instr.bra.constant_buffer != 0,
205 "Constant buffer PBK is not supported");
206
207 if (disable_flow_stack) {
208 break;
209 }
210
211 // PBK pushes to a stack the address where BRK will jump to.
212 const u32 target = pc + instr.bra.GetBranchTarget();
213 bb.push_back(
214 Operation(OperationCode::PushFlowStack, MetaStackClass::Pbk, Immediate(target)));
215 break;
216 }
217 case OpCode::Id::SYNC: {
218 const ConditionCode cc = instr.flow_condition_code;
219 UNIMPLEMENTED_IF_MSG(cc != ConditionCode::T, "SYNC condition code used: {}", cc);
220
221 if (decompiled) {
222 break;
223 }
224
225 // The SYNC opcode jumps to the address previously set by the SSY opcode
226 bb.push_back(Operation(OperationCode::PopFlowStack, MetaStackClass::Ssy));
227 break;
228 }
229 case OpCode::Id::BRK: {
230 const ConditionCode cc = instr.flow_condition_code;
231 UNIMPLEMENTED_IF_MSG(cc != ConditionCode::T, "BRK condition code used: {}", cc);
232 if (decompiled) {
233 break;
234 }
235
236 // The BRK opcode jumps to the address previously set by the PBK opcode
237 bb.push_back(Operation(OperationCode::PopFlowStack, MetaStackClass::Pbk));
238 break;
239 }
240 case OpCode::Id::IPA: {
241 const bool is_physical = instr.ipa.idx && instr.gpr8.Value() != 0xff;
242 const auto attribute = instr.attribute.fmt28;
243 const Index index = attribute.index;
244
245 Node value = is_physical ? GetPhysicalInputAttribute(instr.gpr8)
246 : GetInputAttribute(index, attribute.element);
247
248 // Code taken from Ryujinx.
249 if (index >= Index::Attribute_0 && index <= Index::Attribute_31) {
250 const u32 location = static_cast<u32>(index) - static_cast<u32>(Index::Attribute_0);
251 if (header.ps.GetPixelImap(location) == PixelImap::Perspective) {
252 Node position_w = GetInputAttribute(Index::Position, 3);
253 value = Operation(OperationCode::FMul, move(value), move(position_w));
254 }
255 }
256
257 if (instr.ipa.interp_mode == IpaInterpMode::Multiply) {
258 value = Operation(OperationCode::FMul, move(value), GetRegister(instr.gpr20));
259 }
260
261 value = GetSaturatedFloat(move(value), instr.ipa.saturate);
262
263 SetRegister(bb, instr.gpr0, move(value));
264 break;
265 }
266 case OpCode::Id::OUT_R: {
267 UNIMPLEMENTED_IF_MSG(instr.gpr20.Value() != Register::ZeroIndex,
268 "Stream buffer is not supported");
269
270 if (instr.out.emit) {
271 // gpr0 is used to store the next address and gpr8 contains the address to emit.
272 // Hardware uses pointers here but we just ignore it
273 bb.push_back(Operation(OperationCode::EmitVertex));
274 SetRegister(bb, instr.gpr0, Immediate(0));
275 }
276 if (instr.out.cut) {
277 bb.push_back(Operation(OperationCode::EndPrimitive));
278 }
279 break;
280 }
281 case OpCode::Id::ISBERD: {
282 UNIMPLEMENTED_IF(instr.isberd.o != 0);
283 UNIMPLEMENTED_IF(instr.isberd.skew != 0);
284 UNIMPLEMENTED_IF(instr.isberd.shift != Tegra::Shader::IsberdShift::None);
285 UNIMPLEMENTED_IF(instr.isberd.mode != Tegra::Shader::IsberdMode::None);
286 LOG_WARNING(HW_GPU, "ISBERD instruction is incomplete");
287 SetRegister(bb, instr.gpr0, GetRegister(instr.gpr8));
288 break;
289 }
290 case OpCode::Id::BAR: {
291 UNIMPLEMENTED_IF_MSG(instr.value != 0xF0A81B8000070000ULL, "BAR is not BAR.SYNC 0x0");
292 bb.push_back(Operation(OperationCode::Barrier));
293 break;
294 }
295 case OpCode::Id::MEMBAR: {
296 UNIMPLEMENTED_IF(instr.membar.unknown != Tegra::Shader::MembarUnknown::Default);
297 const OperationCode type = [instr] {
298 switch (instr.membar.type) {
299 case Tegra::Shader::MembarType::CTA:
300 return OperationCode::MemoryBarrierGroup;
301 case Tegra::Shader::MembarType::GL:
302 return OperationCode::MemoryBarrierGlobal;
303 default:
304 UNIMPLEMENTED_MSG("MEMBAR type={}", instr.membar.type.Value());
305 return OperationCode::MemoryBarrierGlobal;
306 }
307 }();
308 bb.push_back(Operation(type));
309 break;
310 }
311 case OpCode::Id::DEPBAR: {
312 LOG_DEBUG(HW_GPU, "DEPBAR instruction is stubbed");
313 break;
314 }
315 default:
316 UNIMPLEMENTED_MSG("Unhandled instruction: {}", opcode->get().GetName());
317 }
318
319 return pc;
320}
321
322} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode/predicate_set_predicate.cpp b/src/video_core/shader/decode/predicate_set_predicate.cpp
deleted file mode 100644
index 9290d22eb..000000000
--- a/src/video_core/shader/decode/predicate_set_predicate.cpp
+++ /dev/null
@@ -1,68 +0,0 @@
1// Copyright 2018 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/assert.h"
6#include "common/common_types.h"
7#include "video_core/engines/shader_bytecode.h"
8#include "video_core/shader/node_helper.h"
9#include "video_core/shader/shader_ir.h"
10
11namespace VideoCommon::Shader {
12
13using Tegra::Shader::Instruction;
14using Tegra::Shader::OpCode;
15using Tegra::Shader::Pred;
16
17u32 ShaderIR::DecodePredicateSetPredicate(NodeBlock& bb, u32 pc) {
18 const Instruction instr = {program_code[pc]};
19 const auto opcode = OpCode::Decode(instr);
20
21 switch (opcode->get().GetId()) {
22 case OpCode::Id::PSETP: {
23 const Node op_a = GetPredicate(instr.psetp.pred12, instr.psetp.neg_pred12 != 0);
24 const Node op_b = GetPredicate(instr.psetp.pred29, instr.psetp.neg_pred29 != 0);
25
26 // We can't use the constant predicate as destination.
27 ASSERT(instr.psetp.pred3 != static_cast<u64>(Pred::UnusedIndex));
28
29 const Node second_pred = GetPredicate(instr.psetp.pred39, instr.psetp.neg_pred39 != 0);
30
31 const OperationCode combiner = GetPredicateCombiner(instr.psetp.op);
32 const Node predicate = Operation(combiner, op_a, op_b);
33
34 // Set the primary predicate to the result of Predicate OP SecondPredicate
35 SetPredicate(bb, instr.psetp.pred3, Operation(combiner, predicate, second_pred));
36
37 if (instr.psetp.pred0 != static_cast<u64>(Pred::UnusedIndex)) {
38 // Set the secondary predicate to the result of !Predicate OP SecondPredicate, if
39 // enabled
40 SetPredicate(bb, instr.psetp.pred0,
41 Operation(combiner, Operation(OperationCode::LogicalNegate, predicate),
42 second_pred));
43 }
44 break;
45 }
46 case OpCode::Id::CSETP: {
47 const Node pred = GetPredicate(instr.csetp.pred39, instr.csetp.neg_pred39 != 0);
48 const Node condition_code = GetConditionCode(instr.csetp.cc);
49
50 const OperationCode combiner = GetPredicateCombiner(instr.csetp.op);
51
52 if (instr.csetp.pred3 != static_cast<u64>(Pred::UnusedIndex)) {
53 SetPredicate(bb, instr.csetp.pred3, Operation(combiner, condition_code, pred));
54 }
55 if (instr.csetp.pred0 != static_cast<u64>(Pred::UnusedIndex)) {
56 const Node neg_cc = Operation(OperationCode::LogicalNegate, condition_code);
57 SetPredicate(bb, instr.csetp.pred0, Operation(combiner, neg_cc, pred));
58 }
59 break;
60 }
61 default:
62 UNIMPLEMENTED_MSG("Unhandled predicate instruction: {}", opcode->get().GetName());
63 }
64
65 return pc;
66}
67
68} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode/predicate_set_register.cpp b/src/video_core/shader/decode/predicate_set_register.cpp
deleted file mode 100644
index 84dbc50fe..000000000
--- a/src/video_core/shader/decode/predicate_set_register.cpp
+++ /dev/null
@@ -1,46 +0,0 @@
1// Copyright 2018 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/assert.h"
6#include "common/common_types.h"
7#include "video_core/engines/shader_bytecode.h"
8#include "video_core/shader/node_helper.h"
9#include "video_core/shader/shader_ir.h"
10
11namespace VideoCommon::Shader {
12
13using Tegra::Shader::Instruction;
14using Tegra::Shader::OpCode;
15
16u32 ShaderIR::DecodePredicateSetRegister(NodeBlock& bb, u32 pc) {
17 const Instruction instr = {program_code[pc]};
18
19 UNIMPLEMENTED_IF_MSG(instr.generates_cc,
20 "Condition codes generation in PSET is not implemented");
21
22 const Node op_a = GetPredicate(instr.pset.pred12, instr.pset.neg_pred12 != 0);
23 const Node op_b = GetPredicate(instr.pset.pred29, instr.pset.neg_pred29 != 0);
24 const Node first_pred = Operation(GetPredicateCombiner(instr.pset.cond), op_a, op_b);
25
26 const Node second_pred = GetPredicate(instr.pset.pred39, instr.pset.neg_pred39 != 0);
27
28 const OperationCode combiner = GetPredicateCombiner(instr.pset.op);
29 const Node predicate = Operation(combiner, first_pred, second_pred);
30
31 const Node true_value = instr.pset.bf ? Immediate(1.0f) : Immediate(0xffffffff);
32 const Node false_value = instr.pset.bf ? Immediate(0.0f) : Immediate(0);
33 const Node value =
34 Operation(OperationCode::Select, PRECISE, predicate, true_value, false_value);
35
36 if (instr.pset.bf) {
37 SetInternalFlagsFromFloat(bb, value, instr.generates_cc);
38 } else {
39 SetInternalFlagsFromInteger(bb, value, instr.generates_cc);
40 }
41 SetRegister(bb, instr.gpr0, value);
42
43 return pc;
44}
45
46} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode/register_set_predicate.cpp b/src/video_core/shader/decode/register_set_predicate.cpp
deleted file mode 100644
index 6116c31aa..000000000
--- a/src/video_core/shader/decode/register_set_predicate.cpp
+++ /dev/null
@@ -1,86 +0,0 @@
1// Copyright 2018 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <utility>
6
7#include "common/assert.h"
8#include "common/common_types.h"
9#include "video_core/engines/shader_bytecode.h"
10#include "video_core/shader/node_helper.h"
11#include "video_core/shader/shader_ir.h"
12
13namespace VideoCommon::Shader {
14
15using std::move;
16using Tegra::Shader::Instruction;
17using Tegra::Shader::OpCode;
18
19namespace {
20constexpr u64 NUM_CONDITION_CODES = 4;
21constexpr u64 NUM_PREDICATES = 7;
22} // namespace
23
24u32 ShaderIR::DecodeRegisterSetPredicate(NodeBlock& bb, u32 pc) {
25 const Instruction instr = {program_code[pc]};
26 const auto opcode = OpCode::Decode(instr);
27
28 Node apply_mask = [this, opcode, instr] {
29 switch (opcode->get().GetId()) {
30 case OpCode::Id::R2P_IMM:
31 case OpCode::Id::P2R_IMM:
32 return Immediate(static_cast<u32>(instr.p2r_r2p.immediate_mask));
33 default:
34 UNREACHABLE();
35 return Immediate(0);
36 }
37 }();
38
39 const u32 offset = static_cast<u32>(instr.p2r_r2p.byte) * 8;
40
41 const bool cc = instr.p2r_r2p.mode == Tegra::Shader::R2pMode::Cc;
42 const u64 num_entries = cc ? NUM_CONDITION_CODES : NUM_PREDICATES;
43 const auto get_entry = [this, cc](u64 entry) {
44 return cc ? GetInternalFlag(static_cast<InternalFlag>(entry)) : GetPredicate(entry);
45 };
46
47 switch (opcode->get().GetId()) {
48 case OpCode::Id::R2P_IMM: {
49 Node mask = GetRegister(instr.gpr8);
50
51 for (u64 entry = 0; entry < num_entries; ++entry) {
52 const u32 shift = static_cast<u32>(entry);
53
54 Node apply = BitfieldExtract(apply_mask, shift, 1);
55 Node condition = Operation(OperationCode::LogicalUNotEqual, apply, Immediate(0));
56
57 Node compare = BitfieldExtract(mask, offset + shift, 1);
58 Node value = Operation(OperationCode::LogicalUNotEqual, move(compare), Immediate(0));
59
60 Node code = Operation(OperationCode::LogicalAssign, get_entry(entry), move(value));
61 bb.push_back(Conditional(condition, {move(code)}));
62 }
63 break;
64 }
65 case OpCode::Id::P2R_IMM: {
66 Node value = Immediate(0);
67 for (u64 entry = 0; entry < num_entries; ++entry) {
68 Node bit = Operation(OperationCode::Select, get_entry(entry), Immediate(1U << entry),
69 Immediate(0));
70 value = Operation(OperationCode::UBitwiseOr, move(value), move(bit));
71 }
72 value = Operation(OperationCode::UBitwiseAnd, move(value), apply_mask);
73 value = BitfieldInsert(GetRegister(instr.gpr8), move(value), offset, 8);
74
75 SetRegister(bb, instr.gpr0, move(value));
76 break;
77 }
78 default:
79 UNIMPLEMENTED_MSG("Unhandled P2R/R2R instruction: {}", opcode->get().GetName());
80 break;
81 }
82
83 return pc;
84}
85
86} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode/shift.cpp b/src/video_core/shader/decode/shift.cpp
deleted file mode 100644
index a53819c15..000000000
--- a/src/video_core/shader/decode/shift.cpp
+++ /dev/null
@@ -1,153 +0,0 @@
1// Copyright 2018 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/assert.h"
6#include "common/common_types.h"
7#include "video_core/engines/shader_bytecode.h"
8#include "video_core/shader/node_helper.h"
9#include "video_core/shader/shader_ir.h"
10
11namespace VideoCommon::Shader {
12
13using std::move;
14using Tegra::Shader::Instruction;
15using Tegra::Shader::OpCode;
16using Tegra::Shader::ShfType;
17using Tegra::Shader::ShfXmode;
18
19namespace {
20
21Node IsFull(Node shift) {
22 return Operation(OperationCode::LogicalIEqual, move(shift), Immediate(32));
23}
24
25Node Shift(OperationCode opcode, Node value, Node shift) {
26 Node shifted = Operation(opcode, move(value), shift);
27 return Operation(OperationCode::Select, IsFull(move(shift)), Immediate(0), move(shifted));
28}
29
30Node ClampShift(Node shift, s32 size = 32) {
31 shift = Operation(OperationCode::IMax, move(shift), Immediate(0));
32 return Operation(OperationCode::IMin, move(shift), Immediate(size));
33}
34
35Node WrapShift(Node shift, s32 size = 32) {
36 return Operation(OperationCode::UBitwiseAnd, move(shift), Immediate(size - 1));
37}
38
39Node ShiftRight(Node low, Node high, Node shift, Node low_shift, ShfType type) {
40 // These values are used when the shift value is less than 32
41 Node less_low = Shift(OperationCode::ILogicalShiftRight, low, shift);
42 Node less_high = Shift(OperationCode::ILogicalShiftLeft, high, low_shift);
43 Node less = Operation(OperationCode::IBitwiseOr, move(less_high), move(less_low));
44
45 if (type == ShfType::Bits32) {
46 // On 32 bit shifts we are either full (shifting 32) or shifting less than 32 bits
47 return Operation(OperationCode::Select, IsFull(move(shift)), move(high), move(less));
48 }
49
50 // And these when it's larger than or 32
51 const bool is_signed = type == ShfType::S64;
52 const auto opcode = SignedToUnsignedCode(OperationCode::IArithmeticShiftRight, is_signed);
53 Node reduced = Operation(OperationCode::IAdd, shift, Immediate(-32));
54 Node greater = Shift(opcode, high, move(reduced));
55
56 Node is_less = Operation(OperationCode::LogicalILessThan, shift, Immediate(32));
57 Node is_zero = Operation(OperationCode::LogicalIEqual, move(shift), Immediate(0));
58
59 Node value = Operation(OperationCode::Select, move(is_less), move(less), move(greater));
60 return Operation(OperationCode::Select, move(is_zero), move(high), move(value));
61}
62
63Node ShiftLeft(Node low, Node high, Node shift, Node low_shift, ShfType type) {
64 // These values are used when the shift value is less than 32
65 Node less_low = Operation(OperationCode::ILogicalShiftRight, low, low_shift);
66 Node less_high = Operation(OperationCode::ILogicalShiftLeft, high, shift);
67 Node less = Operation(OperationCode::IBitwiseOr, move(less_low), move(less_high));
68
69 if (type == ShfType::Bits32) {
70 // On 32 bit shifts we are either full (shifting 32) or shifting less than 32 bits
71 return Operation(OperationCode::Select, IsFull(move(shift)), move(low), move(less));
72 }
73
74 // And these when it's larger than or 32
75 Node reduced = Operation(OperationCode::IAdd, shift, Immediate(-32));
76 Node greater = Shift(OperationCode::ILogicalShiftLeft, move(low), move(reduced));
77
78 Node is_less = Operation(OperationCode::LogicalILessThan, shift, Immediate(32));
79 Node is_zero = Operation(OperationCode::LogicalIEqual, move(shift), Immediate(0));
80
81 Node value = Operation(OperationCode::Select, move(is_less), move(less), move(greater));
82 return Operation(OperationCode::Select, move(is_zero), move(high), move(value));
83}
84
85} // Anonymous namespace
86
87u32 ShaderIR::DecodeShift(NodeBlock& bb, u32 pc) {
88 const Instruction instr = {program_code[pc]};
89 const auto opcode = OpCode::Decode(instr);
90
91 Node op_a = GetRegister(instr.gpr8);
92 Node op_b = [this, instr] {
93 if (instr.is_b_imm) {
94 return Immediate(instr.alu.GetSignedImm20_20());
95 } else if (instr.is_b_gpr) {
96 return GetRegister(instr.gpr20);
97 } else {
98 return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset());
99 }
100 }();
101
102 switch (const auto opid = opcode->get().GetId(); opid) {
103 case OpCode::Id::SHR_C:
104 case OpCode::Id::SHR_R:
105 case OpCode::Id::SHR_IMM: {
106 op_b = instr.shr.wrap ? WrapShift(move(op_b)) : ClampShift(move(op_b));
107
108 Node value = SignedOperation(OperationCode::IArithmeticShiftRight, instr.shift.is_signed,
109 move(op_a), move(op_b));
110 SetInternalFlagsFromInteger(bb, value, instr.generates_cc);
111 SetRegister(bb, instr.gpr0, move(value));
112 break;
113 }
114 case OpCode::Id::SHL_C:
115 case OpCode::Id::SHL_R:
116 case OpCode::Id::SHL_IMM: {
117 Node value = Operation(OperationCode::ILogicalShiftLeft, op_a, op_b);
118 SetInternalFlagsFromInteger(bb, value, instr.generates_cc);
119 SetRegister(bb, instr.gpr0, move(value));
120 break;
121 }
122 case OpCode::Id::SHF_RIGHT_R:
123 case OpCode::Id::SHF_RIGHT_IMM:
124 case OpCode::Id::SHF_LEFT_R:
125 case OpCode::Id::SHF_LEFT_IMM: {
126 UNIMPLEMENTED_IF(instr.generates_cc);
127 UNIMPLEMENTED_IF_MSG(instr.shf.xmode != ShfXmode::None, "xmode={}",
128 instr.shf.xmode.Value());
129
130 if (instr.is_b_imm) {
131 op_b = Immediate(static_cast<u32>(instr.shf.immediate));
132 }
133 const s32 size = instr.shf.type == ShfType::Bits32 ? 32 : 64;
134 Node shift = instr.shf.wrap ? WrapShift(move(op_b), size) : ClampShift(move(op_b), size);
135
136 Node negated_shift = Operation(OperationCode::INegate, shift);
137 Node low_shift = Operation(OperationCode::IAdd, move(negated_shift), Immediate(32));
138
139 const bool is_right = opid == OpCode::Id::SHF_RIGHT_R || opid == OpCode::Id::SHF_RIGHT_IMM;
140 Node value = (is_right ? ShiftRight : ShiftLeft)(
141 move(op_a), GetRegister(instr.gpr39), move(shift), move(low_shift), instr.shf.type);
142
143 SetRegister(bb, instr.gpr0, move(value));
144 break;
145 }
146 default:
147 UNIMPLEMENTED_MSG("Unhandled shift instruction: {}", opcode->get().GetName());
148 }
149
150 return pc;
151}
152
153} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode/texture.cpp b/src/video_core/shader/decode/texture.cpp
deleted file mode 100644
index c69681e8d..000000000
--- a/src/video_core/shader/decode/texture.cpp
+++ /dev/null
@@ -1,935 +0,0 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <algorithm>
6#include <vector>
7#include <fmt/format.h>
8
9#include "common/assert.h"
10#include "common/bit_field.h"
11#include "common/common_types.h"
12#include "common/logging/log.h"
13#include "video_core/engines/shader_bytecode.h"
14#include "video_core/shader/node_helper.h"
15#include "video_core/shader/registry.h"
16#include "video_core/shader/shader_ir.h"
17
18namespace VideoCommon::Shader {
19
20using Tegra::Shader::Instruction;
21using Tegra::Shader::OpCode;
22using Tegra::Shader::Register;
23using Tegra::Shader::TextureMiscMode;
24using Tegra::Shader::TextureProcessMode;
25using Tegra::Shader::TextureType;
26
27static std::size_t GetCoordCount(TextureType texture_type) {
28 switch (texture_type) {
29 case TextureType::Texture1D:
30 return 1;
31 case TextureType::Texture2D:
32 return 2;
33 case TextureType::Texture3D:
34 case TextureType::TextureCube:
35 return 3;
36 default:
37 UNIMPLEMENTED_MSG("Unhandled texture type: {}", texture_type);
38 return 0;
39 }
40}
41
42u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
43 const Instruction instr = {program_code[pc]};
44 const auto opcode = OpCode::Decode(instr);
45 bool is_bindless = false;
46 switch (opcode->get().GetId()) {
47 case OpCode::Id::TEX: {
48 const TextureType texture_type{instr.tex.texture_type};
49 const bool is_array = instr.tex.array != 0;
50 const bool is_aoffi = instr.tex.UsesMiscMode(TextureMiscMode::AOFFI);
51 const bool depth_compare = instr.tex.UsesMiscMode(TextureMiscMode::DC);
52 const auto process_mode = instr.tex.GetTextureProcessMode();
53 WriteTexInstructionFloat(
54 bb, instr,
55 GetTexCode(instr, texture_type, process_mode, depth_compare, is_array, is_aoffi, {}));
56 break;
57 }
58 case OpCode::Id::TEX_B: {
59 UNIMPLEMENTED_IF_MSG(instr.tex.UsesMiscMode(TextureMiscMode::AOFFI),
60 "AOFFI is not implemented");
61
62 const TextureType texture_type{instr.tex_b.texture_type};
63 const bool is_array = instr.tex_b.array != 0;
64 const bool is_aoffi = instr.tex.UsesMiscMode(TextureMiscMode::AOFFI);
65 const bool depth_compare = instr.tex_b.UsesMiscMode(TextureMiscMode::DC);
66 const auto process_mode = instr.tex_b.GetTextureProcessMode();
67 WriteTexInstructionFloat(bb, instr,
68 GetTexCode(instr, texture_type, process_mode, depth_compare,
69 is_array, is_aoffi, {instr.gpr20}));
70 break;
71 }
72 case OpCode::Id::TEXS: {
73 const TextureType texture_type{instr.texs.GetTextureType()};
74 const bool is_array{instr.texs.IsArrayTexture()};
75 const bool depth_compare = instr.texs.UsesMiscMode(TextureMiscMode::DC);
76 const auto process_mode = instr.texs.GetTextureProcessMode();
77
78 const Node4 components =
79 GetTexsCode(instr, texture_type, process_mode, depth_compare, is_array);
80
81 if (instr.texs.fp32_flag) {
82 WriteTexsInstructionFloat(bb, instr, components);
83 } else {
84 WriteTexsInstructionHalfFloat(bb, instr, components);
85 }
86 break;
87 }
88 case OpCode::Id::TLD4_B: {
89 is_bindless = true;
90 [[fallthrough]];
91 }
92 case OpCode::Id::TLD4: {
93 UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(TextureMiscMode::NDV),
94 "NDV is not implemented");
95 const auto texture_type = instr.tld4.texture_type.Value();
96 const bool depth_compare = is_bindless ? instr.tld4_b.UsesMiscMode(TextureMiscMode::DC)
97 : instr.tld4.UsesMiscMode(TextureMiscMode::DC);
98 const bool is_array = instr.tld4.array != 0;
99 const bool is_aoffi = is_bindless ? instr.tld4_b.UsesMiscMode(TextureMiscMode::AOFFI)
100 : instr.tld4.UsesMiscMode(TextureMiscMode::AOFFI);
101 const bool is_ptp = is_bindless ? instr.tld4_b.UsesMiscMode(TextureMiscMode::PTP)
102 : instr.tld4.UsesMiscMode(TextureMiscMode::PTP);
103 WriteTexInstructionFloat(bb, instr,
104 GetTld4Code(instr, texture_type, depth_compare, is_array, is_aoffi,
105 is_ptp, is_bindless));
106 break;
107 }
108 case OpCode::Id::TLD4S: {
109 constexpr std::size_t num_coords = 2;
110 const bool is_aoffi = instr.tld4s.UsesMiscMode(TextureMiscMode::AOFFI);
111 const bool is_depth_compare = instr.tld4s.UsesMiscMode(TextureMiscMode::DC);
112 const Node op_a = GetRegister(instr.gpr8);
113 const Node op_b = GetRegister(instr.gpr20);
114
115 // TODO(Subv): Figure out how the sampler type is encoded in the TLD4S instruction.
116 std::vector<Node> coords;
117 std::vector<Node> aoffi;
118 Node depth_compare;
119 if (is_depth_compare) {
120 // Note: TLD4S coordinate encoding works just like TEXS's
121 const Node op_y = GetRegister(instr.gpr8.Value() + 1);
122 coords.push_back(op_a);
123 coords.push_back(op_y);
124 if (is_aoffi) {
125 aoffi = GetAoffiCoordinates(op_b, num_coords, true);
126 depth_compare = GetRegister(instr.gpr20.Value() + 1);
127 } else {
128 depth_compare = op_b;
129 }
130 } else {
131 // There's no depth compare
132 coords.push_back(op_a);
133 if (is_aoffi) {
134 coords.push_back(GetRegister(instr.gpr8.Value() + 1));
135 aoffi = GetAoffiCoordinates(op_b, num_coords, true);
136 } else {
137 coords.push_back(op_b);
138 }
139 }
140 const Node component = Immediate(static_cast<u32>(instr.tld4s.component));
141
142 SamplerInfo info;
143 info.is_shadow = is_depth_compare;
144 const std::optional<SamplerEntry> sampler = GetSampler(instr.sampler, info);
145
146 Node4 values;
147 for (u32 element = 0; element < values.size(); ++element) {
148 MetaTexture meta{*sampler, {}, depth_compare, aoffi, {}, {},
149 {}, {}, component, element, {}};
150 values[element] = Operation(OperationCode::TextureGather, meta, coords);
151 }
152
153 if (instr.tld4s.fp16_flag) {
154 WriteTexsInstructionHalfFloat(bb, instr, values, true);
155 } else {
156 WriteTexsInstructionFloat(bb, instr, values, true);
157 }
158 break;
159 }
160 case OpCode::Id::TXD_B:
161 is_bindless = true;
162 [[fallthrough]];
163 case OpCode::Id::TXD: {
164 UNIMPLEMENTED_IF_MSG(instr.txd.UsesMiscMode(TextureMiscMode::AOFFI),
165 "AOFFI is not implemented");
166
167 const bool is_array = instr.txd.is_array != 0;
168 const auto derivate_reg = instr.gpr20.Value();
169 const auto texture_type = instr.txd.texture_type.Value();
170 const auto coord_count = GetCoordCount(texture_type);
171 u64 base_reg = instr.gpr8.Value();
172 Node index_var;
173 SamplerInfo info;
174 info.type = texture_type;
175 info.is_array = is_array;
176 const std::optional<SamplerEntry> sampler =
177 is_bindless ? GetBindlessSampler(base_reg, info, index_var)
178 : GetSampler(instr.sampler, info);
179 Node4 values;
180 if (!sampler) {
181 std::generate(values.begin(), values.end(), [this] { return Immediate(0); });
182 WriteTexInstructionFloat(bb, instr, values);
183 break;
184 }
185
186 if (is_bindless) {
187 base_reg++;
188 }
189
190 std::vector<Node> coords;
191 std::vector<Node> derivates;
192 for (std::size_t i = 0; i < coord_count; ++i) {
193 coords.push_back(GetRegister(base_reg + i));
194 const std::size_t derivate = i * 2;
195 derivates.push_back(GetRegister(derivate_reg + derivate));
196 derivates.push_back(GetRegister(derivate_reg + derivate + 1));
197 }
198
199 Node array_node = {};
200 if (is_array) {
201 const Node info_reg = GetRegister(base_reg + coord_count);
202 array_node = BitfieldExtract(info_reg, 0, 16);
203 }
204
205 for (u32 element = 0; element < values.size(); ++element) {
206 MetaTexture meta{*sampler, array_node, {}, {}, {}, derivates,
207 {}, {}, {}, element, index_var};
208 values[element] = Operation(OperationCode::TextureGradient, std::move(meta), coords);
209 }
210
211 WriteTexInstructionFloat(bb, instr, values);
212
213 break;
214 }
215 case OpCode::Id::TXQ_B:
216 is_bindless = true;
217 [[fallthrough]];
218 case OpCode::Id::TXQ: {
219 Node index_var;
220 const std::optional<SamplerEntry> sampler =
221 is_bindless ? GetBindlessSampler(instr.gpr8, {}, index_var)
222 : GetSampler(instr.sampler, {});
223
224 if (!sampler) {
225 u32 indexer = 0;
226 for (u32 element = 0; element < 4; ++element) {
227 if (!instr.txq.IsComponentEnabled(element)) {
228 continue;
229 }
230 const Node value = Immediate(0);
231 SetTemporary(bb, indexer++, value);
232 }
233 for (u32 i = 0; i < indexer; ++i) {
234 SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i));
235 }
236 break;
237 }
238
239 u32 indexer = 0;
240 switch (instr.txq.query_type) {
241 case Tegra::Shader::TextureQueryType::Dimension: {
242 for (u32 element = 0; element < 4; ++element) {
243 if (!instr.txq.IsComponentEnabled(element)) {
244 continue;
245 }
246 MetaTexture meta{*sampler, {}, {}, {}, {}, {}, {}, {}, {}, element, index_var};
247 const Node value =
248 Operation(OperationCode::TextureQueryDimensions, meta,
249 GetRegister(instr.gpr8.Value() + (is_bindless ? 1 : 0)));
250 SetTemporary(bb, indexer++, value);
251 }
252 for (u32 i = 0; i < indexer; ++i) {
253 SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i));
254 }
255 break;
256 }
257 default:
258 UNIMPLEMENTED_MSG("Unhandled texture query type: {}", instr.txq.query_type.Value());
259 }
260 break;
261 }
262 case OpCode::Id::TMML_B:
263 is_bindless = true;
264 [[fallthrough]];
265 case OpCode::Id::TMML: {
266 UNIMPLEMENTED_IF_MSG(instr.tmml.UsesMiscMode(Tegra::Shader::TextureMiscMode::NDV),
267 "NDV is not implemented");
268
269 const auto texture_type = instr.tmml.texture_type.Value();
270 const bool is_array = instr.tmml.array != 0;
271 SamplerInfo info;
272 info.type = texture_type;
273 info.is_array = is_array;
274 Node index_var;
275 const std::optional<SamplerEntry> sampler =
276 is_bindless ? GetBindlessSampler(instr.gpr20, info, index_var)
277 : GetSampler(instr.sampler, info);
278
279 if (!sampler) {
280 u32 indexer = 0;
281 for (u32 element = 0; element < 2; ++element) {
282 if (!instr.tmml.IsComponentEnabled(element)) {
283 continue;
284 }
285 const Node value = Immediate(0);
286 SetTemporary(bb, indexer++, value);
287 }
288 for (u32 i = 0; i < indexer; ++i) {
289 SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i));
290 }
291 break;
292 }
293
294 const u64 base_index = is_array ? 1 : 0;
295 const u64 num_components = [texture_type] {
296 switch (texture_type) {
297 case TextureType::Texture1D:
298 return 1;
299 case TextureType::Texture2D:
300 return 2;
301 case TextureType::TextureCube:
302 return 3;
303 default:
304 UNIMPLEMENTED_MSG("Unhandled texture type {}", texture_type);
305 return 2;
306 }
307 }();
308 // TODO: What's the array component used for?
309
310 std::vector<Node> coords;
311 coords.reserve(num_components);
312 for (u64 component = 0; component < num_components; ++component) {
313 coords.push_back(GetRegister(instr.gpr8.Value() + base_index + component));
314 }
315
316 u32 indexer = 0;
317 for (u32 element = 0; element < 2; ++element) {
318 if (!instr.tmml.IsComponentEnabled(element)) {
319 continue;
320 }
321 MetaTexture meta{*sampler, {}, {}, {}, {}, {}, {}, {}, {}, element, index_var};
322 Node value = Operation(OperationCode::TextureQueryLod, meta, coords);
323 SetTemporary(bb, indexer++, std::move(value));
324 }
325 for (u32 i = 0; i < indexer; ++i) {
326 SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i));
327 }
328 break;
329 }
330 case OpCode::Id::TLD: {
331 UNIMPLEMENTED_IF_MSG(instr.tld.aoffi, "AOFFI is not implemented");
332 UNIMPLEMENTED_IF_MSG(instr.tld.ms, "MS is not implemented");
333 UNIMPLEMENTED_IF_MSG(instr.tld.cl, "CL is not implemented");
334
335 WriteTexInstructionFloat(bb, instr, GetTldCode(instr));
336 break;
337 }
338 case OpCode::Id::TLDS: {
339 const TextureType texture_type{instr.tlds.GetTextureType()};
340 const bool is_array{instr.tlds.IsArrayTexture()};
341
342 UNIMPLEMENTED_IF_MSG(instr.tlds.UsesMiscMode(TextureMiscMode::AOFFI),
343 "AOFFI is not implemented");
344 UNIMPLEMENTED_IF_MSG(instr.tlds.UsesMiscMode(TextureMiscMode::MZ), "MZ is not implemented");
345
346 const Node4 components = GetTldsCode(instr, texture_type, is_array);
347
348 if (instr.tlds.fp32_flag) {
349 WriteTexsInstructionFloat(bb, instr, components);
350 } else {
351 WriteTexsInstructionHalfFloat(bb, instr, components);
352 }
353 break;
354 }
355 default:
356 UNIMPLEMENTED_MSG("Unhandled memory instruction: {}", opcode->get().GetName());
357 }
358
359 return pc;
360}
361
362ShaderIR::SamplerInfo ShaderIR::GetSamplerInfo(
363 SamplerInfo info, std::optional<Tegra::Engines::SamplerDescriptor> sampler) {
364 if (info.IsComplete()) {
365 return info;
366 }
367 if (!sampler) {
368 LOG_WARNING(HW_GPU, "Unknown sampler info");
369 info.type = info.type.value_or(Tegra::Shader::TextureType::Texture2D);
370 info.is_array = info.is_array.value_or(false);
371 info.is_shadow = info.is_shadow.value_or(false);
372 info.is_buffer = info.is_buffer.value_or(false);
373 return info;
374 }
375 info.type = info.type.value_or(sampler->texture_type);
376 info.is_array = info.is_array.value_or(sampler->is_array != 0);
377 info.is_shadow = info.is_shadow.value_or(sampler->is_shadow != 0);
378 info.is_buffer = info.is_buffer.value_or(sampler->is_buffer != 0);
379 return info;
380}
381
382std::optional<SamplerEntry> ShaderIR::GetSampler(Tegra::Shader::Sampler sampler,
383 SamplerInfo sampler_info) {
384 const u32 offset = static_cast<u32>(sampler.index.Value());
385 const auto info = GetSamplerInfo(sampler_info, registry.ObtainBoundSampler(offset));
386
387 // If this sampler has already been used, return the existing mapping.
388 const auto it =
389 std::find_if(used_samplers.begin(), used_samplers.end(),
390 [offset](const SamplerEntry& entry) { return entry.offset == offset; });
391 if (it != used_samplers.end()) {
392 ASSERT(!it->is_bindless && it->type == info.type && it->is_array == info.is_array &&
393 it->is_shadow == info.is_shadow && it->is_buffer == info.is_buffer);
394 return *it;
395 }
396
397 // Otherwise create a new mapping for this sampler
398 const auto next_index = static_cast<u32>(used_samplers.size());
399 return used_samplers.emplace_back(next_index, offset, *info.type, *info.is_array,
400 *info.is_shadow, *info.is_buffer, false);
401}
402
403std::optional<SamplerEntry> ShaderIR::GetBindlessSampler(Tegra::Shader::Register reg,
404 SamplerInfo info, Node& index_var) {
405 const Node sampler_register = GetRegister(reg);
406 const auto [base_node, tracked_sampler_info] =
407 TrackBindlessSampler(sampler_register, global_code, static_cast<s64>(global_code.size()));
408 if (!base_node) {
409 UNREACHABLE();
410 return std::nullopt;
411 }
412
413 if (const auto sampler_info = std::get_if<BindlessSamplerNode>(&*tracked_sampler_info)) {
414 const u32 buffer = sampler_info->index;
415 const u32 offset = sampler_info->offset;
416 info = GetSamplerInfo(info, registry.ObtainBindlessSampler(buffer, offset));
417
418 // If this sampler has already been used, return the existing mapping.
419 const auto it = std::find_if(used_samplers.begin(), used_samplers.end(),
420 [buffer, offset](const SamplerEntry& entry) {
421 return entry.buffer == buffer && entry.offset == offset;
422 });
423 if (it != used_samplers.end()) {
424 ASSERT(it->is_bindless && it->type == info.type && it->is_array == info.is_array &&
425 it->is_shadow == info.is_shadow);
426 return *it;
427 }
428
429 // Otherwise create a new mapping for this sampler
430 const auto next_index = static_cast<u32>(used_samplers.size());
431 return used_samplers.emplace_back(next_index, offset, buffer, *info.type, *info.is_array,
432 *info.is_shadow, *info.is_buffer, false);
433 }
434 if (const auto sampler_info = std::get_if<SeparateSamplerNode>(&*tracked_sampler_info)) {
435 const std::pair indices = sampler_info->indices;
436 const std::pair offsets = sampler_info->offsets;
437 info = GetSamplerInfo(info, registry.ObtainSeparateSampler(indices, offsets));
438
439 // Try to use an already created sampler if it exists
440 const auto it =
441 std::find_if(used_samplers.begin(), used_samplers.end(),
442 [indices, offsets](const SamplerEntry& entry) {
443 return offsets == std::pair{entry.offset, entry.secondary_offset} &&
444 indices == std::pair{entry.buffer, entry.secondary_buffer};
445 });
446 if (it != used_samplers.end()) {
447 ASSERT(it->is_separated && it->type == info.type && it->is_array == info.is_array &&
448 it->is_shadow == info.is_shadow && it->is_buffer == info.is_buffer);
449 return *it;
450 }
451
452 // Otherwise create a new mapping for this sampler
453 const u32 next_index = static_cast<u32>(used_samplers.size());
454 return used_samplers.emplace_back(next_index, offsets, indices, *info.type, *info.is_array,
455 *info.is_shadow, *info.is_buffer);
456 }
457 if (const auto sampler_info = std::get_if<ArraySamplerNode>(&*tracked_sampler_info)) {
458 const u32 base_offset = sampler_info->base_offset / 4;
459 index_var = GetCustomVariable(sampler_info->bindless_var);
460 info = GetSamplerInfo(info, registry.ObtainBoundSampler(base_offset));
461
462 // If this sampler has already been used, return the existing mapping.
463 const auto it = std::find_if(
464 used_samplers.begin(), used_samplers.end(),
465 [base_offset](const SamplerEntry& entry) { return entry.offset == base_offset; });
466 if (it != used_samplers.end()) {
467 ASSERT(!it->is_bindless && it->type == info.type && it->is_array == info.is_array &&
468 it->is_shadow == info.is_shadow && it->is_buffer == info.is_buffer &&
469 it->is_indexed);
470 return *it;
471 }
472
473 uses_indexed_samplers = true;
474 // Otherwise create a new mapping for this sampler
475 const auto next_index = static_cast<u32>(used_samplers.size());
476 return used_samplers.emplace_back(next_index, base_offset, *info.type, *info.is_array,
477 *info.is_shadow, *info.is_buffer, true);
478 }
479 return std::nullopt;
480}
481
482void ShaderIR::WriteTexInstructionFloat(NodeBlock& bb, Instruction instr, const Node4& components) {
483 u32 dest_elem = 0;
484 for (u32 elem = 0; elem < 4; ++elem) {
485 if (!instr.tex.IsComponentEnabled(elem)) {
486 // Skip disabled components
487 continue;
488 }
489 SetTemporary(bb, dest_elem++, components[elem]);
490 }
491 // After writing values in temporals, move them to the real registers
492 for (u32 i = 0; i < dest_elem; ++i) {
493 SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i));
494 }
495}
496
497void ShaderIR::WriteTexsInstructionFloat(NodeBlock& bb, Instruction instr, const Node4& components,
498 bool ignore_mask) {
499 // TEXS has two destination registers and a swizzle. The first two elements in the swizzle
500 // go into gpr0+0 and gpr0+1, and the rest goes into gpr28+0 and gpr28+1
501
502 u32 dest_elem = 0;
503 for (u32 component = 0; component < 4; ++component) {
504 if (!instr.texs.IsComponentEnabled(component) && !ignore_mask)
505 continue;
506 SetTemporary(bb, dest_elem++, components[component]);
507 }
508
509 for (u32 i = 0; i < dest_elem; ++i) {
510 if (i < 2) {
511 // Write the first two swizzle components to gpr0 and gpr0+1
512 SetRegister(bb, instr.gpr0.Value() + i % 2, GetTemporary(i));
513 } else {
514 ASSERT(instr.texs.HasTwoDestinations());
515 // Write the rest of the swizzle components to gpr28 and gpr28+1
516 SetRegister(bb, instr.gpr28.Value() + i % 2, GetTemporary(i));
517 }
518 }
519}
520
521void ShaderIR::WriteTexsInstructionHalfFloat(NodeBlock& bb, Instruction instr,
522 const Node4& components, bool ignore_mask) {
523 // TEXS.F16 destionation registers are packed in two registers in pairs (just like any half
524 // float instruction).
525
526 Node4 values;
527 u32 dest_elem = 0;
528 for (u32 component = 0; component < 4; ++component) {
529 if (!instr.texs.IsComponentEnabled(component) && !ignore_mask)
530 continue;
531 values[dest_elem++] = components[component];
532 }
533 if (dest_elem == 0)
534 return;
535
536 std::generate(values.begin() + dest_elem, values.end(), [&]() { return Immediate(0); });
537
538 const Node first_value = Operation(OperationCode::HPack2, values[0], values[1]);
539 if (dest_elem <= 2) {
540 SetRegister(bb, instr.gpr0, first_value);
541 return;
542 }
543
544 SetTemporary(bb, 0, first_value);
545 SetTemporary(bb, 1, Operation(OperationCode::HPack2, values[2], values[3]));
546
547 SetRegister(bb, instr.gpr0, GetTemporary(0));
548 SetRegister(bb, instr.gpr28, GetTemporary(1));
549}
550
551Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type,
552 TextureProcessMode process_mode, std::vector<Node> coords,
553 Node array, Node depth_compare, u32 bias_offset,
554 std::vector<Node> aoffi,
555 std::optional<Tegra::Shader::Register> bindless_reg) {
556 const bool is_array = array != nullptr;
557 const bool is_shadow = depth_compare != nullptr;
558 const bool is_bindless = bindless_reg.has_value();
559
560 ASSERT_MSG(texture_type != TextureType::Texture3D || !is_array || !is_shadow,
561 "Illegal texture type");
562
563 SamplerInfo info;
564 info.type = texture_type;
565 info.is_array = is_array;
566 info.is_shadow = is_shadow;
567 info.is_buffer = false;
568
569 Node index_var;
570 const std::optional<SamplerEntry> sampler =
571 is_bindless ? GetBindlessSampler(*bindless_reg, info, index_var)
572 : GetSampler(instr.sampler, info);
573 if (!sampler) {
574 return {Immediate(0), Immediate(0), Immediate(0), Immediate(0)};
575 }
576
577 const bool lod_needed = process_mode == TextureProcessMode::LZ ||
578 process_mode == TextureProcessMode::LL ||
579 process_mode == TextureProcessMode::LLA;
580 const OperationCode opcode = lod_needed ? OperationCode::TextureLod : OperationCode::Texture;
581
582 Node bias;
583 Node lod;
584 switch (process_mode) {
585 case TextureProcessMode::None:
586 break;
587 case TextureProcessMode::LZ:
588 lod = Immediate(0.0f);
589 break;
590 case TextureProcessMode::LB:
591 // If present, lod or bias are always stored in the register indexed by the gpr20 field with
592 // an offset depending on the usage of the other registers.
593 bias = GetRegister(instr.gpr20.Value() + bias_offset);
594 break;
595 case TextureProcessMode::LL:
596 lod = GetRegister(instr.gpr20.Value() + bias_offset);
597 break;
598 default:
599 UNIMPLEMENTED_MSG("Unimplemented process mode={}", process_mode);
600 break;
601 }
602
603 Node4 values;
604 for (u32 element = 0; element < values.size(); ++element) {
605 MetaTexture meta{*sampler, array, depth_compare, aoffi, {}, {}, bias,
606 lod, {}, element, index_var};
607 values[element] = Operation(opcode, meta, coords);
608 }
609
610 return values;
611}
612
613Node4 ShaderIR::GetTexCode(Instruction instr, TextureType texture_type,
614 TextureProcessMode process_mode, bool depth_compare, bool is_array,
615 bool is_aoffi, std::optional<Tegra::Shader::Register> bindless_reg) {
616 const bool lod_bias_enabled{
617 (process_mode != TextureProcessMode::None && process_mode != TextureProcessMode::LZ)};
618
619 const bool is_bindless = bindless_reg.has_value();
620
621 u64 parameter_register = instr.gpr20.Value();
622 if (is_bindless) {
623 ++parameter_register;
624 }
625
626 const u32 bias_lod_offset = (is_bindless ? 1 : 0);
627 if (lod_bias_enabled) {
628 ++parameter_register;
629 }
630
631 const auto coord_counts = ValidateAndGetCoordinateElement(texture_type, depth_compare, is_array,
632 lod_bias_enabled, 4, 5);
633 const auto coord_count = std::get<0>(coord_counts);
634 // If enabled arrays index is always stored in the gpr8 field
635 const u64 array_register = instr.gpr8.Value();
636 // First coordinate index is the gpr8 or gpr8 + 1 when arrays are used
637 const u64 coord_register = array_register + (is_array ? 1 : 0);
638
639 std::vector<Node> coords;
640 for (std::size_t i = 0; i < coord_count; ++i) {
641 coords.push_back(GetRegister(coord_register + i));
642 }
643 // 1D.DC in OpenGL the 2nd component is ignored.
644 if (depth_compare && !is_array && texture_type == TextureType::Texture1D) {
645 coords.push_back(Immediate(0.0f));
646 }
647
648 const Node array = is_array ? GetRegister(array_register) : nullptr;
649
650 std::vector<Node> aoffi;
651 if (is_aoffi) {
652 aoffi = GetAoffiCoordinates(GetRegister(parameter_register++), coord_count, false);
653 }
654
655 Node dc;
656 if (depth_compare) {
657 // Depth is always stored in the register signaled by gpr20 or in the next register if lod
658 // or bias are used
659 dc = GetRegister(parameter_register++);
660 }
661
662 return GetTextureCode(instr, texture_type, process_mode, coords, array, dc, bias_lod_offset,
663 aoffi, bindless_reg);
664}
665
666Node4 ShaderIR::GetTexsCode(Instruction instr, TextureType texture_type,
667 TextureProcessMode process_mode, bool depth_compare, bool is_array) {
668 const bool lod_bias_enabled =
669 (process_mode != TextureProcessMode::None && process_mode != TextureProcessMode::LZ);
670
671 const auto coord_counts = ValidateAndGetCoordinateElement(texture_type, depth_compare, is_array,
672 lod_bias_enabled, 4, 4);
673 const auto coord_count = std::get<0>(coord_counts);
674
675 // If enabled arrays index is always stored in the gpr8 field
676 const u64 array_register = instr.gpr8.Value();
677 // First coordinate index is stored in gpr8 field or (gpr8 + 1) when arrays are used
678 const u64 coord_register = array_register + (is_array ? 1 : 0);
679 const u64 last_coord_register =
680 (is_array || !(lod_bias_enabled || depth_compare) || (coord_count > 2))
681 ? static_cast<u64>(instr.gpr20.Value())
682 : coord_register + 1;
683 const u32 bias_offset = coord_count > 2 ? 1 : 0;
684
685 std::vector<Node> coords;
686 for (std::size_t i = 0; i < coord_count; ++i) {
687 const bool last = (i == (coord_count - 1)) && (coord_count > 1);
688 coords.push_back(GetRegister(last ? last_coord_register : coord_register + i));
689 }
690
691 const Node array = is_array ? GetRegister(array_register) : nullptr;
692
693 Node dc;
694 if (depth_compare) {
695 // Depth is always stored in the register signaled by gpr20 or in the next register if lod
696 // or bias are used
697 const u64 depth_register = instr.gpr20.Value() + (lod_bias_enabled ? 1 : 0);
698 dc = GetRegister(depth_register);
699 }
700
701 return GetTextureCode(instr, texture_type, process_mode, coords, array, dc, bias_offset, {},
702 {});
703}
704
705Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool depth_compare,
706 bool is_array, bool is_aoffi, bool is_ptp, bool is_bindless) {
707 ASSERT_MSG(!(is_aoffi && is_ptp), "AOFFI and PTP can't be enabled at the same time");
708
709 const std::size_t coord_count = GetCoordCount(texture_type);
710
711 // If enabled arrays index is always stored in the gpr8 field
712 const u64 array_register = instr.gpr8.Value();
713 // First coordinate index is the gpr8 or gpr8 + 1 when arrays are used
714 const u64 coord_register = array_register + (is_array ? 1 : 0);
715
716 std::vector<Node> coords;
717 for (std::size_t i = 0; i < coord_count; ++i) {
718 coords.push_back(GetRegister(coord_register + i));
719 }
720
721 u64 parameter_register = instr.gpr20.Value();
722
723 SamplerInfo info;
724 info.type = texture_type;
725 info.is_array = is_array;
726 info.is_shadow = depth_compare;
727
728 Node index_var;
729 const std::optional<SamplerEntry> sampler =
730 is_bindless ? GetBindlessSampler(parameter_register++, info, index_var)
731 : GetSampler(instr.sampler, info);
732 Node4 values;
733 if (!sampler) {
734 for (u32 element = 0; element < values.size(); ++element) {
735 values[element] = Immediate(0);
736 }
737 return values;
738 }
739
740 std::vector<Node> aoffi, ptp;
741 if (is_aoffi) {
742 aoffi = GetAoffiCoordinates(GetRegister(parameter_register++), coord_count, true);
743 } else if (is_ptp) {
744 ptp = GetPtpCoordinates(
745 {GetRegister(parameter_register++), GetRegister(parameter_register++)});
746 }
747
748 Node dc;
749 if (depth_compare) {
750 dc = GetRegister(parameter_register++);
751 }
752
753 const Node component = is_bindless ? Immediate(static_cast<u32>(instr.tld4_b.component))
754 : Immediate(static_cast<u32>(instr.tld4.component));
755
756 for (u32 element = 0; element < values.size(); ++element) {
757 auto coords_copy = coords;
758 MetaTexture meta{
759 *sampler, GetRegister(array_register), dc, aoffi, ptp, {}, {}, {}, component, element,
760 index_var};
761 values[element] = Operation(OperationCode::TextureGather, meta, std::move(coords_copy));
762 }
763
764 return values;
765}
766
767Node4 ShaderIR::GetTldCode(Tegra::Shader::Instruction instr) {
768 const auto texture_type{instr.tld.texture_type};
769 const bool is_array{instr.tld.is_array != 0};
770 const bool lod_enabled{instr.tld.GetTextureProcessMode() == TextureProcessMode::LL};
771 const std::size_t coord_count{GetCoordCount(texture_type)};
772
773 u64 gpr8_cursor{instr.gpr8.Value()};
774 const Node array_register{is_array ? GetRegister(gpr8_cursor++) : nullptr};
775
776 std::vector<Node> coords;
777 coords.reserve(coord_count);
778 for (std::size_t i = 0; i < coord_count; ++i) {
779 coords.push_back(GetRegister(gpr8_cursor++));
780 }
781
782 u64 gpr20_cursor{instr.gpr20.Value()};
783 // const Node bindless_register{is_bindless ? GetRegister(gpr20_cursor++) : nullptr};
784 const Node lod{lod_enabled ? GetRegister(gpr20_cursor++) : Immediate(0u)};
785 // const Node aoffi_register{is_aoffi ? GetRegister(gpr20_cursor++) : nullptr};
786 // const Node multisample{is_multisample ? GetRegister(gpr20_cursor++) : nullptr};
787
788 const std::optional<SamplerEntry> sampler = GetSampler(instr.sampler, {});
789
790 Node4 values;
791 for (u32 element = 0; element < values.size(); ++element) {
792 auto coords_copy = coords;
793 MetaTexture meta{*sampler, array_register, {}, {}, {}, {}, {}, lod, {}, element, {}};
794 values[element] = Operation(OperationCode::TexelFetch, meta, std::move(coords_copy));
795 }
796
797 return values;
798}
799
800Node4 ShaderIR::GetTldsCode(Instruction instr, TextureType texture_type, bool is_array) {
801 SamplerInfo info;
802 info.type = texture_type;
803 info.is_array = is_array;
804 info.is_shadow = false;
805 const std::optional<SamplerEntry> sampler = GetSampler(instr.sampler, info);
806
807 const std::size_t type_coord_count = GetCoordCount(texture_type);
808 const bool lod_enabled = instr.tlds.GetTextureProcessMode() == TextureProcessMode::LL;
809 const bool aoffi_enabled = instr.tlds.UsesMiscMode(TextureMiscMode::AOFFI);
810
811 // If enabled arrays index is always stored in the gpr8 field
812 const u64 array_register = instr.gpr8.Value();
813 // if is array gpr20 is used
814 const u64 coord_register = is_array ? instr.gpr20.Value() : instr.gpr8.Value();
815
816 const u64 last_coord_register =
817 ((type_coord_count > 2) || (type_coord_count == 2 && !lod_enabled)) && !is_array
818 ? static_cast<u64>(instr.gpr20.Value())
819 : coord_register + 1;
820
821 std::vector<Node> coords;
822 for (std::size_t i = 0; i < type_coord_count; ++i) {
823 const bool last = (i == (type_coord_count - 1)) && (type_coord_count > 1);
824 coords.push_back(
825 GetRegister(last && !aoffi_enabled ? last_coord_register : coord_register + i));
826 }
827
828 const Node array = is_array ? GetRegister(array_register) : nullptr;
829 // When lod is used always is in gpr20
830 const Node lod = lod_enabled ? GetRegister(instr.gpr20) : Immediate(0);
831
832 std::vector<Node> aoffi;
833 if (aoffi_enabled) {
834 aoffi = GetAoffiCoordinates(GetRegister(instr.gpr20), type_coord_count, false);
835 }
836
837 Node4 values;
838 for (u32 element = 0; element < values.size(); ++element) {
839 auto coords_copy = coords;
840 MetaTexture meta{*sampler, array, {}, aoffi, {}, {}, {}, lod, {}, element, {}};
841 values[element] = Operation(OperationCode::TexelFetch, meta, std::move(coords_copy));
842 }
843 return values;
844}
845
846std::tuple<std::size_t, std::size_t> ShaderIR::ValidateAndGetCoordinateElement(
847 TextureType texture_type, bool depth_compare, bool is_array, bool lod_bias_enabled,
848 std::size_t max_coords, std::size_t max_inputs) {
849 const std::size_t coord_count = GetCoordCount(texture_type);
850
851 std::size_t total_coord_count = coord_count + (is_array ? 1 : 0) + (depth_compare ? 1 : 0);
852 const std::size_t total_reg_count = total_coord_count + (lod_bias_enabled ? 1 : 0);
853 if (total_coord_count > max_coords || total_reg_count > max_inputs) {
854 UNIMPLEMENTED_MSG("Unsupported Texture operation");
855 total_coord_count = std::min(total_coord_count, max_coords);
856 }
857 // 1D.DC OpenGL is using a vec3 but 2nd component is ignored later.
858 total_coord_count +=
859 (depth_compare && !is_array && texture_type == TextureType::Texture1D) ? 1 : 0;
860
861 return {coord_count, total_coord_count};
862}
863
864std::vector<Node> ShaderIR::GetAoffiCoordinates(Node aoffi_reg, std::size_t coord_count,
865 bool is_tld4) {
866 const std::array coord_offsets = is_tld4 ? std::array{0U, 8U, 16U} : std::array{0U, 4U, 8U};
867 const u32 size = is_tld4 ? 6 : 4;
868 const s32 wrap_value = is_tld4 ? 32 : 8;
869 const s32 diff_value = is_tld4 ? 64 : 16;
870 const u32 mask = (1U << size) - 1;
871
872 std::vector<Node> aoffi;
873 aoffi.reserve(coord_count);
874
875 const auto aoffi_immediate{
876 TrackImmediate(aoffi_reg, global_code, static_cast<s64>(global_code.size()))};
877 if (!aoffi_immediate) {
878 // Variable access, not supported on AMD.
879 LOG_WARNING(HW_GPU,
880 "AOFFI constant folding failed, some hardware might have graphical issues");
881 for (std::size_t coord = 0; coord < coord_count; ++coord) {
882 const Node value = BitfieldExtract(aoffi_reg, coord_offsets[coord], size);
883 const Node condition =
884 Operation(OperationCode::LogicalIGreaterEqual, value, Immediate(wrap_value));
885 const Node negative = Operation(OperationCode::IAdd, value, Immediate(-diff_value));
886 aoffi.push_back(Operation(OperationCode::Select, condition, negative, value));
887 }
888 return aoffi;
889 }
890
891 for (std::size_t coord = 0; coord < coord_count; ++coord) {
892 s32 value = (*aoffi_immediate >> coord_offsets[coord]) & mask;
893 if (value >= wrap_value) {
894 value -= diff_value;
895 }
896 aoffi.push_back(Immediate(value));
897 }
898 return aoffi;
899}
900
901std::vector<Node> ShaderIR::GetPtpCoordinates(std::array<Node, 2> ptp_regs) {
902 static constexpr u32 num_entries = 8;
903
904 std::vector<Node> ptp;
905 ptp.reserve(num_entries);
906
907 const auto global_size = static_cast<s64>(global_code.size());
908 const std::optional low = TrackImmediate(ptp_regs[0], global_code, global_size);
909 const std::optional high = TrackImmediate(ptp_regs[1], global_code, global_size);
910 if (!low || !high) {
911 for (u32 entry = 0; entry < num_entries; ++entry) {
912 const u32 reg = entry / 4;
913 const u32 offset = entry % 4;
914 const Node value = BitfieldExtract(ptp_regs[reg], offset * 8, 6);
915 const Node condition =
916 Operation(OperationCode::LogicalIGreaterEqual, value, Immediate(32));
917 const Node negative = Operation(OperationCode::IAdd, value, Immediate(-64));
918 ptp.push_back(Operation(OperationCode::Select, condition, negative, value));
919 }
920 return ptp;
921 }
922
923 const u64 immediate = (static_cast<u64>(*high) << 32) | static_cast<u64>(*low);
924 for (u32 entry = 0; entry < num_entries; ++entry) {
925 s32 value = (immediate >> (entry * 8)) & 0b111111;
926 if (value >= 32) {
927 value -= 64;
928 }
929 ptp.push_back(Immediate(value));
930 }
931
932 return ptp;
933}
934
935} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode/video.cpp b/src/video_core/shader/decode/video.cpp
deleted file mode 100644
index 1c0957277..000000000
--- a/src/video_core/shader/decode/video.cpp
+++ /dev/null
@@ -1,169 +0,0 @@
1// Copyright 2018 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/assert.h"
6#include "common/common_types.h"
7#include "video_core/engines/shader_bytecode.h"
8#include "video_core/shader/node_helper.h"
9#include "video_core/shader/shader_ir.h"
10
11namespace VideoCommon::Shader {
12
13using std::move;
14using Tegra::Shader::Instruction;
15using Tegra::Shader::OpCode;
16using Tegra::Shader::Pred;
17using Tegra::Shader::VideoType;
18using Tegra::Shader::VmadShr;
19using Tegra::Shader::VmnmxOperation;
20using Tegra::Shader::VmnmxType;
21
22u32 ShaderIR::DecodeVideo(NodeBlock& bb, u32 pc) {
23 const Instruction instr = {program_code[pc]};
24 const auto opcode = OpCode::Decode(instr);
25
26 if (opcode->get().GetId() == OpCode::Id::VMNMX) {
27 DecodeVMNMX(bb, instr);
28 return pc;
29 }
30
31 const Node op_a =
32 GetVideoOperand(GetRegister(instr.gpr8), instr.video.is_byte_chunk_a, instr.video.signed_a,
33 instr.video.type_a, instr.video.byte_height_a);
34 const Node op_b = [this, instr] {
35 if (instr.video.use_register_b) {
36 return GetVideoOperand(GetRegister(instr.gpr20), instr.video.is_byte_chunk_b,
37 instr.video.signed_b, instr.video.type_b,
38 instr.video.byte_height_b);
39 }
40 if (instr.video.signed_b) {
41 const auto imm = static_cast<s16>(instr.alu.GetImm20_16());
42 return Immediate(static_cast<u32>(imm));
43 } else {
44 return Immediate(instr.alu.GetImm20_16());
45 }
46 }();
47
48 switch (opcode->get().GetId()) {
49 case OpCode::Id::VMAD: {
50 const bool result_signed = instr.video.signed_a == 1 || instr.video.signed_b == 1;
51 const Node op_c = GetRegister(instr.gpr39);
52
53 Node value = SignedOperation(OperationCode::IMul, result_signed, NO_PRECISE, op_a, op_b);
54 value = SignedOperation(OperationCode::IAdd, result_signed, NO_PRECISE, value, op_c);
55
56 if (instr.vmad.shr == VmadShr::Shr7 || instr.vmad.shr == VmadShr::Shr15) {
57 const Node shift = Immediate(instr.vmad.shr == VmadShr::Shr7 ? 7 : 15);
58 value =
59 SignedOperation(OperationCode::IArithmeticShiftRight, result_signed, value, shift);
60 }
61
62 SetInternalFlagsFromInteger(bb, value, instr.generates_cc);
63 SetRegister(bb, instr.gpr0, value);
64 break;
65 }
66 case OpCode::Id::VSETP: {
67 // We can't use the constant predicate as destination.
68 ASSERT(instr.vsetp.pred3 != static_cast<u64>(Pred::UnusedIndex));
69
70 const bool sign = instr.video.signed_a == 1 || instr.video.signed_b == 1;
71 const Node first_pred = GetPredicateComparisonInteger(instr.vsetp.cond, sign, op_a, op_b);
72 const Node second_pred = GetPredicate(instr.vsetp.pred39, false);
73
74 const OperationCode combiner = GetPredicateCombiner(instr.vsetp.op);
75
76 // Set the primary predicate to the result of Predicate OP SecondPredicate
77 SetPredicate(bb, instr.vsetp.pred3, Operation(combiner, first_pred, second_pred));
78
79 if (instr.vsetp.pred0 != static_cast<u64>(Pred::UnusedIndex)) {
80 // Set the secondary predicate to the result of !Predicate OP SecondPredicate,
81 // if enabled
82 const Node negate_pred = Operation(OperationCode::LogicalNegate, first_pred);
83 SetPredicate(bb, instr.vsetp.pred0, Operation(combiner, negate_pred, second_pred));
84 }
85 break;
86 }
87 default:
88 UNIMPLEMENTED_MSG("Unhandled video instruction: {}", opcode->get().GetName());
89 }
90
91 return pc;
92}
93
94Node ShaderIR::GetVideoOperand(Node op, bool is_chunk, bool is_signed, VideoType type,
95 u64 byte_height) {
96 if (!is_chunk) {
97 return BitfieldExtract(op, static_cast<u32>(byte_height * 8), 8);
98 }
99
100 switch (type) {
101 case VideoType::Size16_Low:
102 return BitfieldExtract(op, 0, 16);
103 case VideoType::Size16_High:
104 return BitfieldExtract(op, 16, 16);
105 case VideoType::Size32:
106 // TODO(Rodrigo): From my hardware tests it becomes a bit "mad" when this type is used
107 // (1 * 1 + 0 == 0x5b800000). Until a better explanation is found: abort.
108 UNIMPLEMENTED();
109 return Immediate(0);
110 case VideoType::Invalid:
111 UNREACHABLE_MSG("Invalid instruction encoding");
112 return Immediate(0);
113 default:
114 UNREACHABLE();
115 return Immediate(0);
116 }
117}
118
119void ShaderIR::DecodeVMNMX(NodeBlock& bb, Tegra::Shader::Instruction instr) {
120 UNIMPLEMENTED_IF(!instr.vmnmx.is_op_b_register);
121 UNIMPLEMENTED_IF(instr.vmnmx.SourceFormatA() != VmnmxType::Bits32);
122 UNIMPLEMENTED_IF(instr.vmnmx.SourceFormatB() != VmnmxType::Bits32);
123 UNIMPLEMENTED_IF(instr.vmnmx.is_src_a_signed != instr.vmnmx.is_src_b_signed);
124 UNIMPLEMENTED_IF(instr.vmnmx.sat);
125 UNIMPLEMENTED_IF(instr.generates_cc);
126
127 Node op_a = GetRegister(instr.gpr8);
128 Node op_b = GetRegister(instr.gpr20);
129 Node op_c = GetRegister(instr.gpr39);
130
131 const bool is_oper1_signed = instr.vmnmx.is_src_a_signed; // Stubbed
132 const bool is_oper2_signed = instr.vmnmx.is_dest_signed;
133
134 const auto operation_a = instr.vmnmx.mx ? OperationCode::IMax : OperationCode::IMin;
135 Node value = SignedOperation(operation_a, is_oper1_signed, move(op_a), move(op_b));
136
137 switch (instr.vmnmx.operation) {
138 case VmnmxOperation::Mrg_16H:
139 value = BitfieldInsert(move(op_c), move(value), 16, 16);
140 break;
141 case VmnmxOperation::Mrg_16L:
142 value = BitfieldInsert(move(op_c), move(value), 0, 16);
143 break;
144 case VmnmxOperation::Mrg_8B0:
145 value = BitfieldInsert(move(op_c), move(value), 0, 8);
146 break;
147 case VmnmxOperation::Mrg_8B2:
148 value = BitfieldInsert(move(op_c), move(value), 16, 8);
149 break;
150 case VmnmxOperation::Acc:
151 value = Operation(OperationCode::IAdd, move(value), move(op_c));
152 break;
153 case VmnmxOperation::Min:
154 value = SignedOperation(OperationCode::IMin, is_oper2_signed, move(value), move(op_c));
155 break;
156 case VmnmxOperation::Max:
157 value = SignedOperation(OperationCode::IMax, is_oper2_signed, move(value), move(op_c));
158 break;
159 case VmnmxOperation::Nop:
160 break;
161 default:
162 UNREACHABLE();
163 break;
164 }
165
166 SetRegister(bb, instr.gpr0, move(value));
167}
168
169} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode/warp.cpp b/src/video_core/shader/decode/warp.cpp
deleted file mode 100644
index 37433d783..000000000
--- a/src/video_core/shader/decode/warp.cpp
+++ /dev/null
@@ -1,117 +0,0 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/assert.h"
6#include "common/common_types.h"
7#include "video_core/engines/shader_bytecode.h"
8#include "video_core/shader/node_helper.h"
9#include "video_core/shader/shader_ir.h"
10
11namespace VideoCommon::Shader {
12
13using Tegra::Shader::Instruction;
14using Tegra::Shader::OpCode;
15using Tegra::Shader::Pred;
16using Tegra::Shader::ShuffleOperation;
17using Tegra::Shader::VoteOperation;
18
19namespace {
20
21OperationCode GetOperationCode(VoteOperation vote_op) {
22 switch (vote_op) {
23 case VoteOperation::All:
24 return OperationCode::VoteAll;
25 case VoteOperation::Any:
26 return OperationCode::VoteAny;
27 case VoteOperation::Eq:
28 return OperationCode::VoteEqual;
29 default:
30 UNREACHABLE_MSG("Invalid vote operation={}", vote_op);
31 return OperationCode::VoteAll;
32 }
33}
34
35} // Anonymous namespace
36
37u32 ShaderIR::DecodeWarp(NodeBlock& bb, u32 pc) {
38 const Instruction instr = {program_code[pc]};
39 const auto opcode = OpCode::Decode(instr);
40
41 // Signal the backend that this shader uses warp instructions.
42 uses_warps = true;
43
44 switch (opcode->get().GetId()) {
45 case OpCode::Id::VOTE: {
46 const Node value = GetPredicate(instr.vote.value, instr.vote.negate_value != 0);
47 const Node active = Operation(OperationCode::BallotThread, value);
48 const Node vote = Operation(GetOperationCode(instr.vote.operation), value);
49 SetRegister(bb, instr.gpr0, active);
50 SetPredicate(bb, instr.vote.dest_pred, vote);
51 break;
52 }
53 case OpCode::Id::SHFL: {
54 Node mask = instr.shfl.is_mask_imm ? Immediate(static_cast<u32>(instr.shfl.mask_imm))
55 : GetRegister(instr.gpr39);
56 Node index = instr.shfl.is_index_imm ? Immediate(static_cast<u32>(instr.shfl.index_imm))
57 : GetRegister(instr.gpr20);
58
59 Node thread_id = Operation(OperationCode::ThreadId);
60 Node clamp = Operation(OperationCode::IBitwiseAnd, mask, Immediate(0x1FU));
61 Node seg_mask = BitfieldExtract(mask, 8, 16);
62
63 Node neg_seg_mask = Operation(OperationCode::IBitwiseNot, seg_mask);
64 Node min_thread_id = Operation(OperationCode::IBitwiseAnd, thread_id, seg_mask);
65 Node max_thread_id = Operation(OperationCode::IBitwiseOr, min_thread_id,
66 Operation(OperationCode::IBitwiseAnd, clamp, neg_seg_mask));
67
68 Node src_thread_id = [instr, index, neg_seg_mask, min_thread_id, thread_id] {
69 switch (instr.shfl.operation) {
70 case ShuffleOperation::Idx:
71 return Operation(OperationCode::IBitwiseOr,
72 Operation(OperationCode::IBitwiseAnd, index, neg_seg_mask),
73 min_thread_id);
74 case ShuffleOperation::Down:
75 return Operation(OperationCode::IAdd, thread_id, index);
76 case ShuffleOperation::Up:
77 return Operation(OperationCode::IAdd, thread_id,
78 Operation(OperationCode::INegate, index));
79 case ShuffleOperation::Bfly:
80 return Operation(OperationCode::IBitwiseXor, thread_id, index);
81 }
82 UNREACHABLE();
83 return Immediate(0U);
84 }();
85
86 Node in_bounds = [instr, src_thread_id, min_thread_id, max_thread_id] {
87 if (instr.shfl.operation == ShuffleOperation::Up) {
88 return Operation(OperationCode::LogicalIGreaterEqual, src_thread_id, min_thread_id);
89 } else {
90 return Operation(OperationCode::LogicalILessEqual, src_thread_id, max_thread_id);
91 }
92 }();
93
94 SetPredicate(bb, instr.shfl.pred48, in_bounds);
95 SetRegister(
96 bb, instr.gpr0,
97 Operation(OperationCode::ShuffleIndexed, GetRegister(instr.gpr8), src_thread_id));
98 break;
99 }
100 case OpCode::Id::FSWZADD: {
101 UNIMPLEMENTED_IF(instr.fswzadd.ndv);
102
103 Node op_a = GetRegister(instr.gpr8);
104 Node op_b = GetRegister(instr.gpr20);
105 Node mask = Immediate(static_cast<u32>(instr.fswzadd.swizzle));
106 SetRegister(bb, instr.gpr0, Operation(OperationCode::FSwizzleAdd, op_a, op_b, mask));
107 break;
108 }
109 default:
110 UNIMPLEMENTED_MSG("Unhandled warp instruction: {}", opcode->get().GetName());
111 break;
112 }
113
114 return pc;
115}
116
117} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode/xmad.cpp b/src/video_core/shader/decode/xmad.cpp
deleted file mode 100644
index 233b8fa42..000000000
--- a/src/video_core/shader/decode/xmad.cpp
+++ /dev/null
@@ -1,156 +0,0 @@
1// Copyright 2018 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/assert.h"
6#include "common/common_types.h"
7#include "video_core/engines/shader_bytecode.h"
8#include "video_core/shader/node_helper.h"
9#include "video_core/shader/shader_ir.h"
10
11namespace VideoCommon::Shader {
12
13using Tegra::Shader::Instruction;
14using Tegra::Shader::OpCode;
15using Tegra::Shader::PredCondition;
16
17u32 ShaderIR::DecodeXmad(NodeBlock& bb, u32 pc) {
18 const Instruction instr = {program_code[pc]};
19 const auto opcode = OpCode::Decode(instr);
20
21 UNIMPLEMENTED_IF(instr.xmad.sign_a);
22 UNIMPLEMENTED_IF(instr.xmad.sign_b);
23 UNIMPLEMENTED_IF_MSG(instr.generates_cc,
24 "Condition codes generation in XMAD is not implemented");
25
26 Node op_a = GetRegister(instr.gpr8);
27
28 // TODO(bunnei): Needs to be fixed once op_a or op_b is signed
29 UNIMPLEMENTED_IF(instr.xmad.sign_a != instr.xmad.sign_b);
30 const bool is_signed_a = instr.xmad.sign_a == 1;
31 const bool is_signed_b = instr.xmad.sign_b == 1;
32 const bool is_signed_c = is_signed_a;
33
34 auto [is_merge, is_psl, is_high_b, mode, op_b_binding,
35 op_c] = [&]() -> std::tuple<bool, bool, bool, Tegra::Shader::XmadMode, Node, Node> {
36 switch (opcode->get().GetId()) {
37 case OpCode::Id::XMAD_CR:
38 return {instr.xmad.merge_56,
39 instr.xmad.product_shift_left_second,
40 instr.xmad.high_b,
41 instr.xmad.mode_cbf,
42 GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()),
43 GetRegister(instr.gpr39)};
44 case OpCode::Id::XMAD_RR:
45 return {instr.xmad.merge_37, instr.xmad.product_shift_left, instr.xmad.high_b_rr,
46 instr.xmad.mode, GetRegister(instr.gpr20), GetRegister(instr.gpr39)};
47 case OpCode::Id::XMAD_RC:
48 return {false,
49 false,
50 instr.xmad.high_b,
51 instr.xmad.mode_cbf,
52 GetRegister(instr.gpr39),
53 GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset())};
54 case OpCode::Id::XMAD_IMM:
55 return {instr.xmad.merge_37,
56 instr.xmad.product_shift_left,
57 false,
58 instr.xmad.mode,
59 Immediate(static_cast<u32>(instr.xmad.imm20_16)),
60 GetRegister(instr.gpr39)};
61 default:
62 UNIMPLEMENTED_MSG("Unhandled XMAD instruction: {}", opcode->get().GetName());
63 return {false, false, false, Tegra::Shader::XmadMode::None, Immediate(0), Immediate(0)};
64 }
65 }();
66
67 op_a = SignedOperation(OperationCode::IBitfieldExtract, is_signed_a, std::move(op_a),
68 instr.xmad.high_a ? Immediate(16) : Immediate(0), Immediate(16));
69
70 const Node original_b = op_b_binding;
71 const Node op_b =
72 SignedOperation(OperationCode::IBitfieldExtract, is_signed_b, std::move(op_b_binding),
73 is_high_b ? Immediate(16) : Immediate(0), Immediate(16));
74
75 // we already check sign_a and sign_b is difference or not before so just use one in here.
76 Node product = SignedOperation(OperationCode::IMul, is_signed_a, op_a, op_b);
77 if (is_psl) {
78 product =
79 SignedOperation(OperationCode::ILogicalShiftLeft, is_signed_a, product, Immediate(16));
80 }
81 SetTemporary(bb, 0, product);
82 product = GetTemporary(0);
83
84 Node original_c = op_c;
85 const Tegra::Shader::XmadMode set_mode = mode; // Workaround to clang compile error
86 op_c = [&] {
87 switch (set_mode) {
88 case Tegra::Shader::XmadMode::None:
89 return original_c;
90 case Tegra::Shader::XmadMode::CLo:
91 return BitfieldExtract(std::move(original_c), 0, 16);
92 case Tegra::Shader::XmadMode::CHi:
93 return BitfieldExtract(std::move(original_c), 16, 16);
94 case Tegra::Shader::XmadMode::CBcc: {
95 Node shifted_b = SignedOperation(OperationCode::ILogicalShiftLeft, is_signed_b,
96 original_b, Immediate(16));
97 return SignedOperation(OperationCode::IAdd, is_signed_c, std::move(original_c),
98 std::move(shifted_b));
99 }
100 case Tegra::Shader::XmadMode::CSfu: {
101 const Node comp_a =
102 GetPredicateComparisonInteger(PredCondition::EQ, is_signed_a, op_a, Immediate(0));
103 const Node comp_b =
104 GetPredicateComparisonInteger(PredCondition::EQ, is_signed_b, op_b, Immediate(0));
105 const Node comp = Operation(OperationCode::LogicalOr, comp_a, comp_b);
106
107 const Node comp_minus_a = GetPredicateComparisonInteger(
108 PredCondition::NE, is_signed_a,
109 SignedOperation(OperationCode::IBitwiseAnd, is_signed_a, op_a,
110 Immediate(0x80000000)),
111 Immediate(0));
112 const Node comp_minus_b = GetPredicateComparisonInteger(
113 PredCondition::NE, is_signed_b,
114 SignedOperation(OperationCode::IBitwiseAnd, is_signed_b, op_b,
115 Immediate(0x80000000)),
116 Immediate(0));
117
118 Node new_c = Operation(
119 OperationCode::Select, comp_minus_a,
120 SignedOperation(OperationCode::IAdd, is_signed_c, original_c, Immediate(-65536)),
121 original_c);
122 new_c = Operation(
123 OperationCode::Select, comp_minus_b,
124 SignedOperation(OperationCode::IAdd, is_signed_c, new_c, Immediate(-65536)),
125 std::move(new_c));
126
127 return Operation(OperationCode::Select, comp, original_c, std::move(new_c));
128 }
129 default:
130 UNREACHABLE();
131 return Immediate(0);
132 }
133 }();
134
135 SetTemporary(bb, 1, op_c);
136 op_c = GetTemporary(1);
137
138 // TODO(Rodrigo): Use an appropiate sign for this operation
139 Node sum = SignedOperation(OperationCode::IAdd, is_signed_a, product, std::move(op_c));
140 SetTemporary(bb, 2, sum);
141 sum = GetTemporary(2);
142 if (is_merge) {
143 const Node a = SignedOperation(OperationCode::IBitfieldExtract, is_signed_a, std::move(sum),
144 Immediate(0), Immediate(16));
145 const Node b = SignedOperation(OperationCode::ILogicalShiftLeft, is_signed_b, original_b,
146 Immediate(16));
147 sum = SignedOperation(OperationCode::IBitwiseOr, is_signed_a, a, b);
148 }
149
150 SetInternalFlagsFromInteger(bb, sum, instr.generates_cc);
151 SetRegister(bb, instr.gpr0, std::move(sum));
152
153 return pc;
154}
155
156} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/expr.cpp b/src/video_core/shader/expr.cpp
deleted file mode 100644
index 2647865d4..000000000
--- a/src/video_core/shader/expr.cpp
+++ /dev/null
@@ -1,93 +0,0 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <memory>
6#include <variant>
7
8#include "video_core/shader/expr.h"
9
10namespace VideoCommon::Shader {
11namespace {
12bool ExprIsBoolean(const Expr& expr) {
13 return std::holds_alternative<ExprBoolean>(*expr);
14}
15
16bool ExprBooleanGet(const Expr& expr) {
17 return std::get_if<ExprBoolean>(expr.get())->value;
18}
19} // Anonymous namespace
20
21bool ExprAnd::operator==(const ExprAnd& b) const {
22 return (*operand1 == *b.operand1) && (*operand2 == *b.operand2);
23}
24
25bool ExprAnd::operator!=(const ExprAnd& b) const {
26 return !operator==(b);
27}
28
29bool ExprOr::operator==(const ExprOr& b) const {
30 return (*operand1 == *b.operand1) && (*operand2 == *b.operand2);
31}
32
33bool ExprOr::operator!=(const ExprOr& b) const {
34 return !operator==(b);
35}
36
37bool ExprNot::operator==(const ExprNot& b) const {
38 return *operand1 == *b.operand1;
39}
40
41bool ExprNot::operator!=(const ExprNot& b) const {
42 return !operator==(b);
43}
44
45Expr MakeExprNot(Expr first) {
46 if (std::holds_alternative<ExprNot>(*first)) {
47 return std::get_if<ExprNot>(first.get())->operand1;
48 }
49 return MakeExpr<ExprNot>(std::move(first));
50}
51
52Expr MakeExprAnd(Expr first, Expr second) {
53 if (ExprIsBoolean(first)) {
54 return ExprBooleanGet(first) ? second : first;
55 }
56 if (ExprIsBoolean(second)) {
57 return ExprBooleanGet(second) ? first : second;
58 }
59 return MakeExpr<ExprAnd>(std::move(first), std::move(second));
60}
61
62Expr MakeExprOr(Expr first, Expr second) {
63 if (ExprIsBoolean(first)) {
64 return ExprBooleanGet(first) ? first : second;
65 }
66 if (ExprIsBoolean(second)) {
67 return ExprBooleanGet(second) ? second : first;
68 }
69 return MakeExpr<ExprOr>(std::move(first), std::move(second));
70}
71
72bool ExprAreEqual(const Expr& first, const Expr& second) {
73 return (*first) == (*second);
74}
75
76bool ExprAreOpposite(const Expr& first, const Expr& second) {
77 if (std::holds_alternative<ExprNot>(*first)) {
78 return ExprAreEqual(std::get_if<ExprNot>(first.get())->operand1, second);
79 }
80 if (std::holds_alternative<ExprNot>(*second)) {
81 return ExprAreEqual(std::get_if<ExprNot>(second.get())->operand1, first);
82 }
83 return false;
84}
85
86bool ExprIsTrue(const Expr& first) {
87 if (ExprIsBoolean(first)) {
88 return ExprBooleanGet(first);
89 }
90 return false;
91}
92
93} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/expr.h b/src/video_core/shader/expr.h
deleted file mode 100644
index cda284c72..000000000
--- a/src/video_core/shader/expr.h
+++ /dev/null
@@ -1,156 +0,0 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <memory>
8#include <variant>
9
10#include "video_core/engines/shader_bytecode.h"
11
12namespace VideoCommon::Shader {
13
14using Tegra::Shader::ConditionCode;
15using Tegra::Shader::Pred;
16
17class ExprAnd;
18class ExprBoolean;
19class ExprCondCode;
20class ExprGprEqual;
21class ExprNot;
22class ExprOr;
23class ExprPredicate;
24class ExprVar;
25
26using ExprData = std::variant<ExprVar, ExprCondCode, ExprPredicate, ExprNot, ExprOr, ExprAnd,
27 ExprBoolean, ExprGprEqual>;
28using Expr = std::shared_ptr<ExprData>;
29
30class ExprAnd final {
31public:
32 explicit ExprAnd(Expr a, Expr b) : operand1{std::move(a)}, operand2{std::move(b)} {}
33
34 bool operator==(const ExprAnd& b) const;
35 bool operator!=(const ExprAnd& b) const;
36
37 Expr operand1;
38 Expr operand2;
39};
40
41class ExprOr final {
42public:
43 explicit ExprOr(Expr a, Expr b) : operand1{std::move(a)}, operand2{std::move(b)} {}
44
45 bool operator==(const ExprOr& b) const;
46 bool operator!=(const ExprOr& b) const;
47
48 Expr operand1;
49 Expr operand2;
50};
51
52class ExprNot final {
53public:
54 explicit ExprNot(Expr a) : operand1{std::move(a)} {}
55
56 bool operator==(const ExprNot& b) const;
57 bool operator!=(const ExprNot& b) const;
58
59 Expr operand1;
60};
61
62class ExprVar final {
63public:
64 explicit ExprVar(u32 index) : var_index{index} {}
65
66 bool operator==(const ExprVar& b) const {
67 return var_index == b.var_index;
68 }
69
70 bool operator!=(const ExprVar& b) const {
71 return !operator==(b);
72 }
73
74 u32 var_index;
75};
76
77class ExprPredicate final {
78public:
79 explicit ExprPredicate(u32 predicate_) : predicate{predicate_} {}
80
81 bool operator==(const ExprPredicate& b) const {
82 return predicate == b.predicate;
83 }
84
85 bool operator!=(const ExprPredicate& b) const {
86 return !operator==(b);
87 }
88
89 u32 predicate;
90};
91
92class ExprCondCode final {
93public:
94 explicit ExprCondCode(ConditionCode condition_code) : cc{condition_code} {}
95
96 bool operator==(const ExprCondCode& b) const {
97 return cc == b.cc;
98 }
99
100 bool operator!=(const ExprCondCode& b) const {
101 return !operator==(b);
102 }
103
104 ConditionCode cc;
105};
106
107class ExprBoolean final {
108public:
109 explicit ExprBoolean(bool val) : value{val} {}
110
111 bool operator==(const ExprBoolean& b) const {
112 return value == b.value;
113 }
114
115 bool operator!=(const ExprBoolean& b) const {
116 return !operator==(b);
117 }
118
119 bool value;
120};
121
122class ExprGprEqual final {
123public:
124 explicit ExprGprEqual(u32 gpr_, u32 value_) : gpr{gpr_}, value{value_} {}
125
126 bool operator==(const ExprGprEqual& b) const {
127 return gpr == b.gpr && value == b.value;
128 }
129
130 bool operator!=(const ExprGprEqual& b) const {
131 return !operator==(b);
132 }
133
134 u32 gpr;
135 u32 value;
136};
137
138template <typename T, typename... Args>
139Expr MakeExpr(Args&&... args) {
140 static_assert(std::is_convertible_v<T, ExprData>);
141 return std::make_shared<ExprData>(T(std::forward<Args>(args)...));
142}
143
144bool ExprAreEqual(const Expr& first, const Expr& second);
145
146bool ExprAreOpposite(const Expr& first, const Expr& second);
147
148Expr MakeExprNot(Expr first);
149
150Expr MakeExprAnd(Expr first, Expr second);
151
152Expr MakeExprOr(Expr first, Expr second);
153
154bool ExprIsTrue(const Expr& first);
155
156} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/memory_util.cpp b/src/video_core/shader/memory_util.cpp
deleted file mode 100644
index e18ccba8e..000000000
--- a/src/video_core/shader/memory_util.cpp
+++ /dev/null
@@ -1,76 +0,0 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <algorithm>
6#include <cstddef>
7
8#include <boost/container_hash/hash.hpp>
9
10#include "common/common_types.h"
11#include "core/core.h"
12#include "video_core/engines/maxwell_3d.h"
13#include "video_core/memory_manager.h"
14#include "video_core/shader/memory_util.h"
15#include "video_core/shader/shader_ir.h"
16
17namespace VideoCommon::Shader {
18
19GPUVAddr GetShaderAddress(Tegra::Engines::Maxwell3D& maxwell3d,
20 Tegra::Engines::Maxwell3D::Regs::ShaderProgram program) {
21 const auto& shader_config{maxwell3d.regs.shader_config[static_cast<std::size_t>(program)]};
22 return maxwell3d.regs.code_address.CodeAddress() + shader_config.offset;
23}
24
25bool IsSchedInstruction(std::size_t offset, std::size_t main_offset) {
26 // Sched instructions appear once every 4 instructions.
27 constexpr std::size_t SchedPeriod = 4;
28 const std::size_t absolute_offset = offset - main_offset;
29 return (absolute_offset % SchedPeriod) == 0;
30}
31
32std::size_t CalculateProgramSize(const ProgramCode& program, bool is_compute) {
33 // This is the encoded version of BRA that jumps to itself. All Nvidia
34 // shaders end with one.
35 static constexpr u64 SELF_JUMPING_BRANCH = 0xE2400FFFFF07000FULL;
36 static constexpr u64 MASK = 0xFFFFFFFFFF7FFFFFULL;
37
38 const std::size_t start_offset = is_compute ? KERNEL_MAIN_OFFSET : STAGE_MAIN_OFFSET;
39 std::size_t offset = start_offset;
40 while (offset < program.size()) {
41 const u64 instruction = program[offset];
42 if (!IsSchedInstruction(offset, start_offset)) {
43 if ((instruction & MASK) == SELF_JUMPING_BRANCH) {
44 // End on Maxwell's "nop" instruction
45 break;
46 }
47 if (instruction == 0) {
48 break;
49 }
50 }
51 ++offset;
52 }
53 // The last instruction is included in the program size
54 return std::min(offset + 1, program.size());
55}
56
57ProgramCode GetShaderCode(Tegra::MemoryManager& memory_manager, GPUVAddr gpu_addr,
58 const u8* host_ptr, bool is_compute) {
59 ProgramCode code(VideoCommon::Shader::MAX_PROGRAM_LENGTH);
60 ASSERT_OR_EXECUTE(host_ptr != nullptr, { return code; });
61 memory_manager.ReadBlockUnsafe(gpu_addr, code.data(), code.size() * sizeof(u64));
62 code.resize(CalculateProgramSize(code, is_compute));
63 return code;
64}
65
66u64 GetUniqueIdentifier(Tegra::Engines::ShaderType shader_type, bool is_a, const ProgramCode& code,
67 const ProgramCode& code_b) {
68 size_t unique_identifier = boost::hash_value(code);
69 if (is_a) {
70 // VertexA programs include two programs
71 boost::hash_combine(unique_identifier, boost::hash_value(code_b));
72 }
73 return static_cast<u64>(unique_identifier);
74}
75
76} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/memory_util.h b/src/video_core/shader/memory_util.h
deleted file mode 100644
index 4624d38e6..000000000
--- a/src/video_core/shader/memory_util.h
+++ /dev/null
@@ -1,43 +0,0 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <cstddef>
8#include <vector>
9
10#include "common/common_types.h"
11#include "video_core/engines/maxwell_3d.h"
12#include "video_core/engines/shader_type.h"
13
14namespace Tegra {
15class MemoryManager;
16}
17
18namespace VideoCommon::Shader {
19
20using ProgramCode = std::vector<u64>;
21
22constexpr u32 STAGE_MAIN_OFFSET = 10;
23constexpr u32 KERNEL_MAIN_OFFSET = 0;
24
25/// Gets the address for the specified shader stage program
26GPUVAddr GetShaderAddress(Tegra::Engines::Maxwell3D& maxwell3d,
27 Tegra::Engines::Maxwell3D::Regs::ShaderProgram program);
28
29/// Gets if the current instruction offset is a scheduler instruction
30bool IsSchedInstruction(std::size_t offset, std::size_t main_offset);
31
32/// Calculates the size of a program stream
33std::size_t CalculateProgramSize(const ProgramCode& program, bool is_compute);
34
35/// Gets the shader program code from memory for the specified address
36ProgramCode GetShaderCode(Tegra::MemoryManager& memory_manager, GPUVAddr gpu_addr,
37 const u8* host_ptr, bool is_compute);
38
39/// Hashes one (or two) program streams
40u64 GetUniqueIdentifier(Tegra::Engines::ShaderType shader_type, bool is_a, const ProgramCode& code,
41 const ProgramCode& code_b = {});
42
43} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/node.h b/src/video_core/shader/node.h
deleted file mode 100644
index b54d33763..000000000
--- a/src/video_core/shader/node.h
+++ /dev/null
@@ -1,701 +0,0 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <array>
8#include <cstddef>
9#include <memory>
10#include <optional>
11#include <string>
12#include <tuple>
13#include <utility>
14#include <variant>
15#include <vector>
16
17#include "common/common_types.h"
18#include "video_core/engines/shader_bytecode.h"
19
20namespace VideoCommon::Shader {
21
22enum class OperationCode {
23 Assign, /// (float& dest, float src) -> void
24
25 Select, /// (MetaArithmetic, bool pred, float a, float b) -> float
26
27 FAdd, /// (MetaArithmetic, float a, float b) -> float
28 FMul, /// (MetaArithmetic, float a, float b) -> float
29 FDiv, /// (MetaArithmetic, float a, float b) -> float
30 FFma, /// (MetaArithmetic, float a, float b, float c) -> float
31 FNegate, /// (MetaArithmetic, float a) -> float
32 FAbsolute, /// (MetaArithmetic, float a) -> float
33 FClamp, /// (MetaArithmetic, float value, float min, float max) -> float
34 FCastHalf0, /// (MetaArithmetic, f16vec2 a) -> float
35 FCastHalf1, /// (MetaArithmetic, f16vec2 a) -> float
36 FMin, /// (MetaArithmetic, float a, float b) -> float
37 FMax, /// (MetaArithmetic, float a, float b) -> float
38 FCos, /// (MetaArithmetic, float a) -> float
39 FSin, /// (MetaArithmetic, float a) -> float
40 FExp2, /// (MetaArithmetic, float a) -> float
41 FLog2, /// (MetaArithmetic, float a) -> float
42 FInverseSqrt, /// (MetaArithmetic, float a) -> float
43 FSqrt, /// (MetaArithmetic, float a) -> float
44 FRoundEven, /// (MetaArithmetic, float a) -> float
45 FFloor, /// (MetaArithmetic, float a) -> float
46 FCeil, /// (MetaArithmetic, float a) -> float
47 FTrunc, /// (MetaArithmetic, float a) -> float
48 FCastInteger, /// (MetaArithmetic, int a) -> float
49 FCastUInteger, /// (MetaArithmetic, uint a) -> float
50 FSwizzleAdd, /// (float a, float b, uint mask) -> float
51
52 IAdd, /// (MetaArithmetic, int a, int b) -> int
53 IMul, /// (MetaArithmetic, int a, int b) -> int
54 IDiv, /// (MetaArithmetic, int a, int b) -> int
55 INegate, /// (MetaArithmetic, int a) -> int
56 IAbsolute, /// (MetaArithmetic, int a) -> int
57 IMin, /// (MetaArithmetic, int a, int b) -> int
58 IMax, /// (MetaArithmetic, int a, int b) -> int
59 ICastFloat, /// (MetaArithmetic, float a) -> int
60 ICastUnsigned, /// (MetaArithmetic, uint a) -> int
61 ILogicalShiftLeft, /// (MetaArithmetic, int a, uint b) -> int
62 ILogicalShiftRight, /// (MetaArithmetic, int a, uint b) -> int
63 IArithmeticShiftRight, /// (MetaArithmetic, int a, uint b) -> int
64 IBitwiseAnd, /// (MetaArithmetic, int a, int b) -> int
65 IBitwiseOr, /// (MetaArithmetic, int a, int b) -> int
66 IBitwiseXor, /// (MetaArithmetic, int a, int b) -> int
67 IBitwiseNot, /// (MetaArithmetic, int a) -> int
68 IBitfieldInsert, /// (MetaArithmetic, int base, int insert, int offset, int bits) -> int
69 IBitfieldExtract, /// (MetaArithmetic, int value, int offset, int offset) -> int
70 IBitCount, /// (MetaArithmetic, int) -> int
71 IBitMSB, /// (MetaArithmetic, int) -> int
72
73 UAdd, /// (MetaArithmetic, uint a, uint b) -> uint
74 UMul, /// (MetaArithmetic, uint a, uint b) -> uint
75 UDiv, /// (MetaArithmetic, uint a, uint b) -> uint
76 UMin, /// (MetaArithmetic, uint a, uint b) -> uint
77 UMax, /// (MetaArithmetic, uint a, uint b) -> uint
78 UCastFloat, /// (MetaArithmetic, float a) -> uint
79 UCastSigned, /// (MetaArithmetic, int a) -> uint
80 ULogicalShiftLeft, /// (MetaArithmetic, uint a, uint b) -> uint
81 ULogicalShiftRight, /// (MetaArithmetic, uint a, uint b) -> uint
82 UArithmeticShiftRight, /// (MetaArithmetic, uint a, uint b) -> uint
83 UBitwiseAnd, /// (MetaArithmetic, uint a, uint b) -> uint
84 UBitwiseOr, /// (MetaArithmetic, uint a, uint b) -> uint
85 UBitwiseXor, /// (MetaArithmetic, uint a, uint b) -> uint
86 UBitwiseNot, /// (MetaArithmetic, uint a) -> uint
87 UBitfieldInsert, /// (MetaArithmetic, uint base, uint insert, int offset, int bits) -> uint
88 UBitfieldExtract, /// (MetaArithmetic, uint value, int offset, int offset) -> uint
89 UBitCount, /// (MetaArithmetic, uint) -> uint
90 UBitMSB, /// (MetaArithmetic, uint) -> uint
91
92 HAdd, /// (MetaArithmetic, f16vec2 a, f16vec2 b) -> f16vec2
93 HMul, /// (MetaArithmetic, f16vec2 a, f16vec2 b) -> f16vec2
94 HFma, /// (MetaArithmetic, f16vec2 a, f16vec2 b, f16vec2 c) -> f16vec2
95 HAbsolute, /// (f16vec2 a) -> f16vec2
96 HNegate, /// (f16vec2 a, bool first, bool second) -> f16vec2
97 HClamp, /// (f16vec2 src, float min, float max) -> f16vec2
98 HCastFloat, /// (MetaArithmetic, float a) -> f16vec2
99 HUnpack, /// (Tegra::Shader::HalfType, T value) -> f16vec2
100 HMergeF32, /// (f16vec2 src) -> float
101 HMergeH0, /// (f16vec2 dest, f16vec2 src) -> f16vec2
102 HMergeH1, /// (f16vec2 dest, f16vec2 src) -> f16vec2
103 HPack2, /// (float a, float b) -> f16vec2
104
105 LogicalAssign, /// (bool& dst, bool src) -> void
106 LogicalAnd, /// (bool a, bool b) -> bool
107 LogicalOr, /// (bool a, bool b) -> bool
108 LogicalXor, /// (bool a, bool b) -> bool
109 LogicalNegate, /// (bool a) -> bool
110 LogicalPick2, /// (bool2 pair, uint index) -> bool
111 LogicalAnd2, /// (bool2 a) -> bool
112
113 LogicalFOrdLessThan, /// (float a, float b) -> bool
114 LogicalFOrdEqual, /// (float a, float b) -> bool
115 LogicalFOrdLessEqual, /// (float a, float b) -> bool
116 LogicalFOrdGreaterThan, /// (float a, float b) -> bool
117 LogicalFOrdNotEqual, /// (float a, float b) -> bool
118 LogicalFOrdGreaterEqual, /// (float a, float b) -> bool
119 LogicalFOrdered, /// (float a, float b) -> bool
120 LogicalFUnordered, /// (float a, float b) -> bool
121 LogicalFUnordLessThan, /// (float a, float b) -> bool
122 LogicalFUnordEqual, /// (float a, float b) -> bool
123 LogicalFUnordLessEqual, /// (float a, float b) -> bool
124 LogicalFUnordGreaterThan, /// (float a, float b) -> bool
125 LogicalFUnordNotEqual, /// (float a, float b) -> bool
126 LogicalFUnordGreaterEqual, /// (float a, float b) -> bool
127
128 LogicalILessThan, /// (int a, int b) -> bool
129 LogicalIEqual, /// (int a, int b) -> bool
130 LogicalILessEqual, /// (int a, int b) -> bool
131 LogicalIGreaterThan, /// (int a, int b) -> bool
132 LogicalINotEqual, /// (int a, int b) -> bool
133 LogicalIGreaterEqual, /// (int a, int b) -> bool
134
135 LogicalULessThan, /// (uint a, uint b) -> bool
136 LogicalUEqual, /// (uint a, uint b) -> bool
137 LogicalULessEqual, /// (uint a, uint b) -> bool
138 LogicalUGreaterThan, /// (uint a, uint b) -> bool
139 LogicalUNotEqual, /// (uint a, uint b) -> bool
140 LogicalUGreaterEqual, /// (uint a, uint b) -> bool
141
142 LogicalAddCarry, /// (uint a, uint b) -> bool
143
144 Logical2HLessThan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2
145 Logical2HEqual, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2
146 Logical2HLessEqual, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2
147 Logical2HGreaterThan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2
148 Logical2HNotEqual, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2
149 Logical2HGreaterEqual, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2
150 Logical2HLessThanWithNan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2
151 Logical2HEqualWithNan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2
152 Logical2HLessEqualWithNan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2
153 Logical2HGreaterThanWithNan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2
154 Logical2HNotEqualWithNan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2
155 Logical2HGreaterEqualWithNan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2
156
157 Texture, /// (MetaTexture, float[N] coords) -> float4
158 TextureLod, /// (MetaTexture, float[N] coords) -> float4
159 TextureGather, /// (MetaTexture, float[N] coords) -> float4
160 TextureQueryDimensions, /// (MetaTexture, float a) -> float4
161 TextureQueryLod, /// (MetaTexture, float[N] coords) -> float4
162 TexelFetch, /// (MetaTexture, int[N], int) -> float4
163 TextureGradient, /// (MetaTexture, float[N] coords, float[N*2] derivates) -> float4
164
165 ImageLoad, /// (MetaImage, int[N] coords) -> void
166 ImageStore, /// (MetaImage, int[N] coords) -> void
167
168 AtomicImageAdd, /// (MetaImage, int[N] coords) -> void
169 AtomicImageAnd, /// (MetaImage, int[N] coords) -> void
170 AtomicImageOr, /// (MetaImage, int[N] coords) -> void
171 AtomicImageXor, /// (MetaImage, int[N] coords) -> void
172 AtomicImageExchange, /// (MetaImage, int[N] coords) -> void
173
174 AtomicUExchange, /// (memory, uint) -> uint
175 AtomicUAdd, /// (memory, uint) -> uint
176 AtomicUMin, /// (memory, uint) -> uint
177 AtomicUMax, /// (memory, uint) -> uint
178 AtomicUAnd, /// (memory, uint) -> uint
179 AtomicUOr, /// (memory, uint) -> uint
180 AtomicUXor, /// (memory, uint) -> uint
181
182 AtomicIExchange, /// (memory, int) -> int
183 AtomicIAdd, /// (memory, int) -> int
184 AtomicIMin, /// (memory, int) -> int
185 AtomicIMax, /// (memory, int) -> int
186 AtomicIAnd, /// (memory, int) -> int
187 AtomicIOr, /// (memory, int) -> int
188 AtomicIXor, /// (memory, int) -> int
189
190 ReduceUAdd, /// (memory, uint) -> void
191 ReduceUMin, /// (memory, uint) -> void
192 ReduceUMax, /// (memory, uint) -> void
193 ReduceUAnd, /// (memory, uint) -> void
194 ReduceUOr, /// (memory, uint) -> void
195 ReduceUXor, /// (memory, uint) -> void
196
197 ReduceIAdd, /// (memory, int) -> void
198 ReduceIMin, /// (memory, int) -> void
199 ReduceIMax, /// (memory, int) -> void
200 ReduceIAnd, /// (memory, int) -> void
201 ReduceIOr, /// (memory, int) -> void
202 ReduceIXor, /// (memory, int) -> void
203
204 Branch, /// (uint branch_target) -> void
205 BranchIndirect, /// (uint branch_target) -> void
206 PushFlowStack, /// (uint branch_target) -> void
207 PopFlowStack, /// () -> void
208 Exit, /// () -> void
209 Discard, /// () -> void
210
211 EmitVertex, /// () -> void
212 EndPrimitive, /// () -> void
213
214 InvocationId, /// () -> int
215 YNegate, /// () -> float
216 LocalInvocationIdX, /// () -> uint
217 LocalInvocationIdY, /// () -> uint
218 LocalInvocationIdZ, /// () -> uint
219 WorkGroupIdX, /// () -> uint
220 WorkGroupIdY, /// () -> uint
221 WorkGroupIdZ, /// () -> uint
222
223 BallotThread, /// (bool) -> uint
224 VoteAll, /// (bool) -> bool
225 VoteAny, /// (bool) -> bool
226 VoteEqual, /// (bool) -> bool
227
228 ThreadId, /// () -> uint
229 ThreadEqMask, /// () -> uint
230 ThreadGeMask, /// () -> uint
231 ThreadGtMask, /// () -> uint
232 ThreadLeMask, /// () -> uint
233 ThreadLtMask, /// () -> uint
234 ShuffleIndexed, /// (uint value, uint index) -> uint
235
236 Barrier, /// () -> void
237 MemoryBarrierGroup, /// () -> void
238 MemoryBarrierGlobal, /// () -> void
239
240 Amount,
241};
242
243enum class InternalFlag {
244 Zero = 0,
245 Sign = 1,
246 Carry = 2,
247 Overflow = 3,
248 Amount = 4,
249};
250
251enum class MetaStackClass {
252 Ssy,
253 Pbk,
254};
255
256class OperationNode;
257class ConditionalNode;
258class GprNode;
259class CustomVarNode;
260class ImmediateNode;
261class InternalFlagNode;
262class PredicateNode;
263class AbufNode;
264class CbufNode;
265class LmemNode;
266class PatchNode;
267class SmemNode;
268class GmemNode;
269class CommentNode;
270
271using NodeData = std::variant<OperationNode, ConditionalNode, GprNode, CustomVarNode, ImmediateNode,
272 InternalFlagNode, PredicateNode, AbufNode, PatchNode, CbufNode,
273 LmemNode, SmemNode, GmemNode, CommentNode>;
274using Node = std::shared_ptr<NodeData>;
275using Node4 = std::array<Node, 4>;
276using NodeBlock = std::vector<Node>;
277
278struct ArraySamplerNode;
279struct BindlessSamplerNode;
280struct SeparateSamplerNode;
281
282using TrackSamplerData = std::variant<BindlessSamplerNode, SeparateSamplerNode, ArraySamplerNode>;
283using TrackSampler = std::shared_ptr<TrackSamplerData>;
284
285struct SamplerEntry {
286 /// Bound samplers constructor
287 explicit SamplerEntry(u32 index_, u32 offset_, Tegra::Shader::TextureType type_, bool is_array_,
288 bool is_shadow_, bool is_buffer_, bool is_indexed_)
289 : index{index_}, offset{offset_}, type{type_}, is_array{is_array_}, is_shadow{is_shadow_},
290 is_buffer{is_buffer_}, is_indexed{is_indexed_} {}
291
292 /// Separate sampler constructor
293 explicit SamplerEntry(u32 index_, std::pair<u32, u32> offsets, std::pair<u32, u32> buffers,
294 Tegra::Shader::TextureType type_, bool is_array_, bool is_shadow_,
295 bool is_buffer_)
296 : index{index_}, offset{offsets.first}, secondary_offset{offsets.second},
297 buffer{buffers.first}, secondary_buffer{buffers.second}, type{type_}, is_array{is_array_},
298 is_shadow{is_shadow_}, is_buffer{is_buffer_}, is_separated{true} {}
299
300 /// Bindless samplers constructor
301 explicit SamplerEntry(u32 index_, u32 offset_, u32 buffer_, Tegra::Shader::TextureType type_,
302 bool is_array_, bool is_shadow_, bool is_buffer_, bool is_indexed_)
303 : index{index_}, offset{offset_}, buffer{buffer_}, type{type_}, is_array{is_array_},
304 is_shadow{is_shadow_}, is_buffer{is_buffer_}, is_bindless{true}, is_indexed{is_indexed_} {
305 }
306
307 u32 index = 0; ///< Emulated index given for the this sampler.
308 u32 offset = 0; ///< Offset in the const buffer from where the sampler is being read.
309 u32 secondary_offset = 0; ///< Secondary offset in the const buffer.
310 u32 buffer = 0; ///< Buffer where the bindless sampler is read.
311 u32 secondary_buffer = 0; ///< Secondary buffer where the bindless sampler is read.
312 u32 size = 1; ///< Size of the sampler.
313
314 Tegra::Shader::TextureType type{}; ///< The type used to sample this texture (Texture2D, etc)
315 bool is_array = false; ///< Whether the texture is being sampled as an array texture or not.
316 bool is_shadow = false; ///< Whether the texture is being sampled as a depth texture or not.
317 bool is_buffer = false; ///< Whether the texture is a texture buffer without sampler.
318 bool is_bindless = false; ///< Whether this sampler belongs to a bindless texture or not.
319 bool is_indexed = false; ///< Whether this sampler is an indexed array of textures.
320 bool is_separated = false; ///< Whether the image and sampler is separated or not.
321};
322
323/// Represents a tracked bindless sampler into a direct const buffer
324struct ArraySamplerNode {
325 u32 index;
326 u32 base_offset;
327 u32 bindless_var;
328};
329
330/// Represents a tracked separate sampler image pair that was folded statically
331struct SeparateSamplerNode {
332 std::pair<u32, u32> indices;
333 std::pair<u32, u32> offsets;
334};
335
336/// Represents a tracked bindless sampler into a direct const buffer
337struct BindlessSamplerNode {
338 u32 index;
339 u32 offset;
340};
341
342struct ImageEntry {
343public:
344 /// Bound images constructor
345 explicit ImageEntry(u32 index_, u32 offset_, Tegra::Shader::ImageType type_)
346 : index{index_}, offset{offset_}, type{type_} {}
347
348 /// Bindless samplers constructor
349 explicit ImageEntry(u32 index_, u32 offset_, u32 buffer_, Tegra::Shader::ImageType type_)
350 : index{index_}, offset{offset_}, buffer{buffer_}, type{type_}, is_bindless{true} {}
351
352 void MarkWrite() {
353 is_written = true;
354 }
355
356 void MarkRead() {
357 is_read = true;
358 }
359
360 void MarkAtomic() {
361 MarkWrite();
362 MarkRead();
363 is_atomic = true;
364 }
365
366 u32 index = 0;
367 u32 offset = 0;
368 u32 buffer = 0;
369
370 Tegra::Shader::ImageType type{};
371 bool is_bindless = false;
372 bool is_written = false;
373 bool is_read = false;
374 bool is_atomic = false;
375};
376
377struct GlobalMemoryBase {
378 u32 cbuf_index = 0;
379 u32 cbuf_offset = 0;
380
381 [[nodiscard]] bool operator<(const GlobalMemoryBase& rhs) const {
382 return std::tie(cbuf_index, cbuf_offset) < std::tie(rhs.cbuf_index, rhs.cbuf_offset);
383 }
384};
385
386/// Parameters describing an arithmetic operation
387struct MetaArithmetic {
388 bool precise{}; ///< Whether the operation can be constraint or not
389};
390
391/// Parameters describing a texture sampler
392struct MetaTexture {
393 SamplerEntry sampler;
394 Node array;
395 Node depth_compare;
396 std::vector<Node> aoffi;
397 std::vector<Node> ptp;
398 std::vector<Node> derivates;
399 Node bias;
400 Node lod;
401 Node component;
402 u32 element{};
403 Node index;
404};
405
406struct MetaImage {
407 const ImageEntry& image;
408 std::vector<Node> values;
409 u32 element{};
410};
411
412/// Parameters that modify an operation but are not part of any particular operand
413using Meta =
414 std::variant<MetaArithmetic, MetaTexture, MetaImage, MetaStackClass, Tegra::Shader::HalfType>;
415
416class AmendNode {
417public:
418 [[nodiscard]] std::optional<std::size_t> GetAmendIndex() const {
419 if (amend_index == amend_null_index) {
420 return std::nullopt;
421 }
422 return {amend_index};
423 }
424
425 void SetAmendIndex(std::size_t index) {
426 amend_index = index;
427 }
428
429 void ClearAmend() {
430 amend_index = amend_null_index;
431 }
432
433private:
434 static constexpr std::size_t amend_null_index = 0xFFFFFFFFFFFFFFFFULL;
435 std::size_t amend_index{amend_null_index};
436};
437
438/// Holds any kind of operation that can be done in the IR
439class OperationNode final : public AmendNode {
440public:
441 explicit OperationNode(OperationCode code_) : OperationNode(code_, Meta{}) {}
442
443 explicit OperationNode(OperationCode code_, Meta meta_)
444 : OperationNode(code_, std::move(meta_), std::vector<Node>{}) {}
445
446 explicit OperationNode(OperationCode code_, std::vector<Node> operands_)
447 : OperationNode(code_, Meta{}, std::move(operands_)) {}
448
449 explicit OperationNode(OperationCode code_, Meta meta_, std::vector<Node> operands_)
450 : code{code_}, meta{std::move(meta_)}, operands{std::move(operands_)} {}
451
452 template <typename... Args>
453 explicit OperationNode(OperationCode code_, Meta meta_, Args&&... operands_)
454 : code{code_}, meta{std::move(meta_)}, operands{operands_...} {}
455
456 [[nodiscard]] OperationCode GetCode() const {
457 return code;
458 }
459
460 [[nodiscard]] const Meta& GetMeta() const {
461 return meta;
462 }
463
464 [[nodiscard]] std::size_t GetOperandsCount() const {
465 return operands.size();
466 }
467
468 [[nodiscard]] const Node& operator[](std::size_t operand_index) const {
469 return operands.at(operand_index);
470 }
471
472private:
473 OperationCode code{};
474 Meta meta{};
475 std::vector<Node> operands;
476};
477
478/// Encloses inside any kind of node that returns a boolean conditionally-executed code
479class ConditionalNode final : public AmendNode {
480public:
481 explicit ConditionalNode(Node condition_, std::vector<Node>&& code_)
482 : condition{std::move(condition_)}, code{std::move(code_)} {}
483
484 [[nodiscard]] const Node& GetCondition() const {
485 return condition;
486 }
487
488 [[nodiscard]] const std::vector<Node>& GetCode() const {
489 return code;
490 }
491
492private:
493 Node condition; ///< Condition to be satisfied
494 std::vector<Node> code; ///< Code to execute
495};
496
497/// A general purpose register
498class GprNode final {
499public:
500 explicit constexpr GprNode(Tegra::Shader::Register index_) : index{index_} {}
501
502 [[nodiscard]] constexpr u32 GetIndex() const {
503 return static_cast<u32>(index);
504 }
505
506private:
507 Tegra::Shader::Register index{};
508};
509
510/// A custom variable
511class CustomVarNode final {
512public:
513 explicit constexpr CustomVarNode(u32 index_) : index{index_} {}
514
515 [[nodiscard]] constexpr u32 GetIndex() const {
516 return index;
517 }
518
519private:
520 u32 index{};
521};
522
523/// A 32-bits value that represents an immediate value
524class ImmediateNode final {
525public:
526 explicit constexpr ImmediateNode(u32 value_) : value{value_} {}
527
528 [[nodiscard]] constexpr u32 GetValue() const {
529 return value;
530 }
531
532private:
533 u32 value{};
534};
535
536/// One of Maxwell's internal flags
537class InternalFlagNode final {
538public:
539 explicit constexpr InternalFlagNode(InternalFlag flag_) : flag{flag_} {}
540
541 [[nodiscard]] constexpr InternalFlag GetFlag() const {
542 return flag;
543 }
544
545private:
546 InternalFlag flag{};
547};
548
549/// A predicate register, it can be negated without additional nodes
550class PredicateNode final {
551public:
552 explicit constexpr PredicateNode(Tegra::Shader::Pred index_, bool negated_)
553 : index{index_}, negated{negated_} {}
554
555 [[nodiscard]] constexpr Tegra::Shader::Pred GetIndex() const {
556 return index;
557 }
558
559 [[nodiscard]] constexpr bool IsNegated() const {
560 return negated;
561 }
562
563private:
564 Tegra::Shader::Pred index{};
565 bool negated{};
566};
567
568/// Attribute buffer memory (known as attributes or varyings in GLSL terms)
569class AbufNode final {
570public:
571 // Initialize for standard attributes (index is explicit).
572 explicit AbufNode(Tegra::Shader::Attribute::Index index_, u32 element_, Node buffer_ = {})
573 : buffer{std::move(buffer_)}, index{index_}, element{element_} {}
574
575 // Initialize for physical attributes (index is a variable value).
576 explicit AbufNode(Node physical_address_, Node buffer_ = {})
577 : physical_address{std::move(physical_address_)}, buffer{std::move(buffer_)} {}
578
579 [[nodiscard]] Tegra::Shader::Attribute::Index GetIndex() const {
580 return index;
581 }
582
583 [[nodiscard]] u32 GetElement() const {
584 return element;
585 }
586
587 [[nodiscard]] const Node& GetBuffer() const {
588 return buffer;
589 }
590
591 [[nodiscard]] bool IsPhysicalBuffer() const {
592 return static_cast<bool>(physical_address);
593 }
594
595 [[nodiscard]] const Node& GetPhysicalAddress() const {
596 return physical_address;
597 }
598
599private:
600 Node physical_address;
601 Node buffer;
602 Tegra::Shader::Attribute::Index index{};
603 u32 element{};
604};
605
606/// Patch memory (used to communicate tessellation stages).
607class PatchNode final {
608public:
609 explicit constexpr PatchNode(u32 offset_) : offset{offset_} {}
610
611 [[nodiscard]] constexpr u32 GetOffset() const {
612 return offset;
613 }
614
615private:
616 u32 offset{};
617};
618
619/// Constant buffer node, usually mapped to uniform buffers in GLSL
620class CbufNode final {
621public:
622 explicit CbufNode(u32 index_, Node offset_) : index{index_}, offset{std::move(offset_)} {}
623
624 [[nodiscard]] u32 GetIndex() const {
625 return index;
626 }
627
628 [[nodiscard]] const Node& GetOffset() const {
629 return offset;
630 }
631
632private:
633 u32 index{};
634 Node offset;
635};
636
637/// Local memory node
638class LmemNode final {
639public:
640 explicit LmemNode(Node address_) : address{std::move(address_)} {}
641
642 [[nodiscard]] const Node& GetAddress() const {
643 return address;
644 }
645
646private:
647 Node address;
648};
649
650/// Shared memory node
651class SmemNode final {
652public:
653 explicit SmemNode(Node address_) : address{std::move(address_)} {}
654
655 [[nodiscard]] const Node& GetAddress() const {
656 return address;
657 }
658
659private:
660 Node address;
661};
662
663/// Global memory node
664class GmemNode final {
665public:
666 explicit GmemNode(Node real_address_, Node base_address_, const GlobalMemoryBase& descriptor_)
667 : real_address{std::move(real_address_)}, base_address{std::move(base_address_)},
668 descriptor{descriptor_} {}
669
670 [[nodiscard]] const Node& GetRealAddress() const {
671 return real_address;
672 }
673
674 [[nodiscard]] const Node& GetBaseAddress() const {
675 return base_address;
676 }
677
678 [[nodiscard]] const GlobalMemoryBase& GetDescriptor() const {
679 return descriptor;
680 }
681
682private:
683 Node real_address;
684 Node base_address;
685 GlobalMemoryBase descriptor;
686};
687
688/// Commentary, can be dropped
689class CommentNode final {
690public:
691 explicit CommentNode(std::string text_) : text{std::move(text_)} {}
692
693 [[nodiscard]] const std::string& GetText() const {
694 return text;
695 }
696
697private:
698 std::string text;
699};
700
701} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/node_helper.cpp b/src/video_core/shader/node_helper.cpp
deleted file mode 100644
index 6a5b6940d..000000000
--- a/src/video_core/shader/node_helper.cpp
+++ /dev/null
@@ -1,115 +0,0 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <cstring>
6#include <vector>
7
8#include "common/common_types.h"
9#include "video_core/shader/node_helper.h"
10#include "video_core/shader/shader_ir.h"
11
12namespace VideoCommon::Shader {
13
14Node Conditional(Node condition, std::vector<Node> code) {
15 return MakeNode<ConditionalNode>(std::move(condition), std::move(code));
16}
17
18Node Comment(std::string text) {
19 return MakeNode<CommentNode>(std::move(text));
20}
21
22Node Immediate(u32 value) {
23 return MakeNode<ImmediateNode>(value);
24}
25
26Node Immediate(s32 value) {
27 return Immediate(static_cast<u32>(value));
28}
29
30Node Immediate(f32 value) {
31 u32 integral;
32 std::memcpy(&integral, &value, sizeof(u32));
33 return Immediate(integral);
34}
35
36OperationCode SignedToUnsignedCode(OperationCode operation_code, bool is_signed) {
37 if (is_signed) {
38 return operation_code;
39 }
40 switch (operation_code) {
41 case OperationCode::FCastInteger:
42 return OperationCode::FCastUInteger;
43 case OperationCode::IAdd:
44 return OperationCode::UAdd;
45 case OperationCode::IMul:
46 return OperationCode::UMul;
47 case OperationCode::IDiv:
48 return OperationCode::UDiv;
49 case OperationCode::IMin:
50 return OperationCode::UMin;
51 case OperationCode::IMax:
52 return OperationCode::UMax;
53 case OperationCode::ICastFloat:
54 return OperationCode::UCastFloat;
55 case OperationCode::ICastUnsigned:
56 return OperationCode::UCastSigned;
57 case OperationCode::ILogicalShiftLeft:
58 return OperationCode::ULogicalShiftLeft;
59 case OperationCode::ILogicalShiftRight:
60 return OperationCode::ULogicalShiftRight;
61 case OperationCode::IArithmeticShiftRight:
62 return OperationCode::UArithmeticShiftRight;
63 case OperationCode::IBitwiseAnd:
64 return OperationCode::UBitwiseAnd;
65 case OperationCode::IBitwiseOr:
66 return OperationCode::UBitwiseOr;
67 case OperationCode::IBitwiseXor:
68 return OperationCode::UBitwiseXor;
69 case OperationCode::IBitwiseNot:
70 return OperationCode::UBitwiseNot;
71 case OperationCode::IBitfieldExtract:
72 return OperationCode::UBitfieldExtract;
73 case OperationCode::IBitfieldInsert:
74 return OperationCode::UBitfieldInsert;
75 case OperationCode::IBitCount:
76 return OperationCode::UBitCount;
77 case OperationCode::LogicalILessThan:
78 return OperationCode::LogicalULessThan;
79 case OperationCode::LogicalIEqual:
80 return OperationCode::LogicalUEqual;
81 case OperationCode::LogicalILessEqual:
82 return OperationCode::LogicalULessEqual;
83 case OperationCode::LogicalIGreaterThan:
84 return OperationCode::LogicalUGreaterThan;
85 case OperationCode::LogicalINotEqual:
86 return OperationCode::LogicalUNotEqual;
87 case OperationCode::LogicalIGreaterEqual:
88 return OperationCode::LogicalUGreaterEqual;
89 case OperationCode::AtomicIExchange:
90 return OperationCode::AtomicUExchange;
91 case OperationCode::AtomicIAdd:
92 return OperationCode::AtomicUAdd;
93 case OperationCode::AtomicIMin:
94 return OperationCode::AtomicUMin;
95 case OperationCode::AtomicIMax:
96 return OperationCode::AtomicUMax;
97 case OperationCode::AtomicIAnd:
98 return OperationCode::AtomicUAnd;
99 case OperationCode::AtomicIOr:
100 return OperationCode::AtomicUOr;
101 case OperationCode::AtomicIXor:
102 return OperationCode::AtomicUXor;
103 case OperationCode::INegate:
104 UNREACHABLE_MSG("Can't negate an unsigned integer");
105 return {};
106 case OperationCode::IAbsolute:
107 UNREACHABLE_MSG("Can't apply absolute to an unsigned integer");
108 return {};
109 default:
110 UNREACHABLE_MSG("Unknown signed operation with code={}", operation_code);
111 return {};
112 }
113}
114
115} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/node_helper.h b/src/video_core/shader/node_helper.h
deleted file mode 100644
index 1e0886185..000000000
--- a/src/video_core/shader/node_helper.h
+++ /dev/null
@@ -1,71 +0,0 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <memory>
8#include <string>
9#include <tuple>
10#include <type_traits>
11#include <utility>
12#include <vector>
13
14#include "common/common_types.h"
15#include "video_core/shader/node.h"
16
17namespace VideoCommon::Shader {
18
19/// This arithmetic operation cannot be constraint
20inline constexpr MetaArithmetic PRECISE = {true};
21/// This arithmetic operation can be optimized away
22inline constexpr MetaArithmetic NO_PRECISE = {false};
23
24/// Creates a conditional node
25Node Conditional(Node condition, std::vector<Node> code);
26
27/// Creates a commentary node
28Node Comment(std::string text);
29
30/// Creates an u32 immediate
31Node Immediate(u32 value);
32
33/// Creates a s32 immediate
34Node Immediate(s32 value);
35
36/// Creates a f32 immediate
37Node Immediate(f32 value);
38
39/// Converts an signed operation code to an unsigned operation code
40OperationCode SignedToUnsignedCode(OperationCode operation_code, bool is_signed);
41
42template <typename T, typename... Args>
43Node MakeNode(Args&&... args) {
44 static_assert(std::is_convertible_v<T, NodeData>);
45 return std::make_shared<NodeData>(T(std::forward<Args>(args)...));
46}
47
48template <typename T, typename... Args>
49TrackSampler MakeTrackSampler(Args&&... args) {
50 static_assert(std::is_convertible_v<T, TrackSamplerData>);
51 return std::make_shared<TrackSamplerData>(T{std::forward<Args>(args)...});
52}
53
54template <typename... Args>
55Node Operation(OperationCode code, Args&&... args) {
56 if constexpr (sizeof...(args) == 0) {
57 return MakeNode<OperationNode>(code);
58 } else if constexpr (std::is_convertible_v<std::tuple_element_t<0, std::tuple<Args...>>,
59 Meta>) {
60 return MakeNode<OperationNode>(code, std::forward<Args>(args)...);
61 } else {
62 return MakeNode<OperationNode>(code, Meta{}, std::forward<Args>(args)...);
63 }
64}
65
66template <typename... Args>
67Node SignedOperation(OperationCode code, bool is_signed, Args&&... args) {
68 return Operation(SignedToUnsignedCode(code, is_signed), std::forward<Args>(args)...);
69}
70
71} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/registry.cpp b/src/video_core/shader/registry.cpp
deleted file mode 100644
index 148d91fcb..000000000
--- a/src/video_core/shader/registry.cpp
+++ /dev/null
@@ -1,181 +0,0 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <algorithm>
6#include <tuple>
7
8#include "common/assert.h"
9#include "common/common_types.h"
10#include "video_core/engines/kepler_compute.h"
11#include "video_core/engines/maxwell_3d.h"
12#include "video_core/engines/shader_type.h"
13#include "video_core/shader/registry.h"
14
15namespace VideoCommon::Shader {
16
17using Tegra::Engines::ConstBufferEngineInterface;
18using Tegra::Engines::SamplerDescriptor;
19using Tegra::Engines::ShaderType;
20
21namespace {
22
23GraphicsInfo MakeGraphicsInfo(ShaderType shader_stage, ConstBufferEngineInterface& engine) {
24 if (shader_stage == ShaderType::Compute) {
25 return {};
26 }
27
28 auto& graphics = dynamic_cast<Tegra::Engines::Maxwell3D&>(engine);
29
30 return {
31 .tfb_layouts = graphics.regs.tfb_layouts,
32 .tfb_varying_locs = graphics.regs.tfb_varying_locs,
33 .primitive_topology = graphics.regs.draw.topology,
34 .tessellation_primitive = graphics.regs.tess_mode.prim,
35 .tessellation_spacing = graphics.regs.tess_mode.spacing,
36 .tfb_enabled = graphics.regs.tfb_enabled != 0,
37 .tessellation_clockwise = graphics.regs.tess_mode.cw.Value() != 0,
38 };
39}
40
41ComputeInfo MakeComputeInfo(ShaderType shader_stage, ConstBufferEngineInterface& engine) {
42 if (shader_stage != ShaderType::Compute) {
43 return {};
44 }
45
46 auto& compute = dynamic_cast<Tegra::Engines::KeplerCompute&>(engine);
47 const auto& launch = compute.launch_description;
48
49 return {
50 .workgroup_size = {launch.block_dim_x, launch.block_dim_y, launch.block_dim_z},
51 .shared_memory_size_in_words = launch.shared_alloc,
52 .local_memory_size_in_words = launch.local_pos_alloc,
53 };
54}
55
56} // Anonymous namespace
57
58Registry::Registry(ShaderType shader_stage, const SerializedRegistryInfo& info)
59 : stage{shader_stage}, stored_guest_driver_profile{info.guest_driver_profile},
60 bound_buffer{info.bound_buffer}, graphics_info{info.graphics}, compute_info{info.compute} {}
61
62Registry::Registry(ShaderType shader_stage, ConstBufferEngineInterface& engine_)
63 : stage{shader_stage}, engine{&engine_}, bound_buffer{engine_.GetBoundBuffer()},
64 graphics_info{MakeGraphicsInfo(shader_stage, engine_)}, compute_info{MakeComputeInfo(
65 shader_stage, engine_)} {}
66
67Registry::~Registry() = default;
68
69std::optional<u32> Registry::ObtainKey(u32 buffer, u32 offset) {
70 const std::pair<u32, u32> key = {buffer, offset};
71 const auto iter = keys.find(key);
72 if (iter != keys.end()) {
73 return iter->second;
74 }
75 if (!engine) {
76 return std::nullopt;
77 }
78 const u32 value = engine->AccessConstBuffer32(stage, buffer, offset);
79 keys.emplace(key, value);
80 return value;
81}
82
83std::optional<SamplerDescriptor> Registry::ObtainBoundSampler(u32 offset) {
84 const u32 key = offset;
85 const auto iter = bound_samplers.find(key);
86 if (iter != bound_samplers.end()) {
87 return iter->second;
88 }
89 if (!engine) {
90 return std::nullopt;
91 }
92 const SamplerDescriptor value = engine->AccessBoundSampler(stage, offset);
93 bound_samplers.emplace(key, value);
94 return value;
95}
96
97std::optional<Tegra::Engines::SamplerDescriptor> Registry::ObtainSeparateSampler(
98 std::pair<u32, u32> buffers, std::pair<u32, u32> offsets) {
99 SeparateSamplerKey key;
100 key.buffers = buffers;
101 key.offsets = offsets;
102 const auto iter = separate_samplers.find(key);
103 if (iter != separate_samplers.end()) {
104 return iter->second;
105 }
106 if (!engine) {
107 return std::nullopt;
108 }
109
110 const u32 handle_1 = engine->AccessConstBuffer32(stage, key.buffers.first, key.offsets.first);
111 const u32 handle_2 = engine->AccessConstBuffer32(stage, key.buffers.second, key.offsets.second);
112 const SamplerDescriptor value = engine->AccessSampler(handle_1 | handle_2);
113 separate_samplers.emplace(key, value);
114 return value;
115}
116
117std::optional<SamplerDescriptor> Registry::ObtainBindlessSampler(u32 buffer, u32 offset) {
118 const std::pair key = {buffer, offset};
119 const auto iter = bindless_samplers.find(key);
120 if (iter != bindless_samplers.end()) {
121 return iter->second;
122 }
123 if (!engine) {
124 return std::nullopt;
125 }
126 const SamplerDescriptor value = engine->AccessBindlessSampler(stage, buffer, offset);
127 bindless_samplers.emplace(key, value);
128 return value;
129}
130
131void Registry::InsertKey(u32 buffer, u32 offset, u32 value) {
132 keys.insert_or_assign({buffer, offset}, value);
133}
134
135void Registry::InsertBoundSampler(u32 offset, SamplerDescriptor sampler) {
136 bound_samplers.insert_or_assign(offset, sampler);
137}
138
139void Registry::InsertBindlessSampler(u32 buffer, u32 offset, SamplerDescriptor sampler) {
140 bindless_samplers.insert_or_assign({buffer, offset}, sampler);
141}
142
143bool Registry::IsConsistent() const {
144 if (!engine) {
145 return true;
146 }
147 return std::all_of(keys.begin(), keys.end(),
148 [this](const auto& pair) {
149 const auto [cbuf, offset] = pair.first;
150 const auto value = pair.second;
151 return value == engine->AccessConstBuffer32(stage, cbuf, offset);
152 }) &&
153 std::all_of(bound_samplers.begin(), bound_samplers.end(),
154 [this](const auto& sampler) {
155 const auto [key, value] = sampler;
156 return value == engine->AccessBoundSampler(stage, key);
157 }) &&
158 std::all_of(bindless_samplers.begin(), bindless_samplers.end(),
159 [this](const auto& sampler) {
160 const auto [cbuf, offset] = sampler.first;
161 const auto value = sampler.second;
162 return value == engine->AccessBindlessSampler(stage, cbuf, offset);
163 });
164}
165
166bool Registry::HasEqualKeys(const Registry& rhs) const {
167 return std::tie(keys, bound_samplers, bindless_samplers) ==
168 std::tie(rhs.keys, rhs.bound_samplers, rhs.bindless_samplers);
169}
170
171const GraphicsInfo& Registry::GetGraphicsInfo() const {
172 ASSERT(stage != Tegra::Engines::ShaderType::Compute);
173 return graphics_info;
174}
175
176const ComputeInfo& Registry::GetComputeInfo() const {
177 ASSERT(stage == Tegra::Engines::ShaderType::Compute);
178 return compute_info;
179}
180
181} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/registry.h b/src/video_core/shader/registry.h
deleted file mode 100644
index 4bebefdde..000000000
--- a/src/video_core/shader/registry.h
+++ /dev/null
@@ -1,172 +0,0 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <array>
8#include <optional>
9#include <type_traits>
10#include <unordered_map>
11#include <utility>
12
13#include "common/common_types.h"
14#include "common/hash.h"
15#include "video_core/engines/const_buffer_engine_interface.h"
16#include "video_core/engines/maxwell_3d.h"
17#include "video_core/engines/shader_type.h"
18#include "video_core/guest_driver.h"
19
20namespace VideoCommon::Shader {
21
22struct SeparateSamplerKey {
23 std::pair<u32, u32> buffers;
24 std::pair<u32, u32> offsets;
25};
26
27} // namespace VideoCommon::Shader
28
29namespace std {
30
31template <>
32struct hash<VideoCommon::Shader::SeparateSamplerKey> {
33 std::size_t operator()(const VideoCommon::Shader::SeparateSamplerKey& key) const noexcept {
34 return std::hash<u32>{}(key.buffers.first ^ key.buffers.second ^ key.offsets.first ^
35 key.offsets.second);
36 }
37};
38
39template <>
40struct equal_to<VideoCommon::Shader::SeparateSamplerKey> {
41 bool operator()(const VideoCommon::Shader::SeparateSamplerKey& lhs,
42 const VideoCommon::Shader::SeparateSamplerKey& rhs) const noexcept {
43 return lhs.buffers == rhs.buffers && lhs.offsets == rhs.offsets;
44 }
45};
46
47} // namespace std
48
49namespace VideoCommon::Shader {
50
51using KeyMap = std::unordered_map<std::pair<u32, u32>, u32, Common::PairHash>;
52using BoundSamplerMap = std::unordered_map<u32, Tegra::Engines::SamplerDescriptor>;
53using SeparateSamplerMap =
54 std::unordered_map<SeparateSamplerKey, Tegra::Engines::SamplerDescriptor>;
55using BindlessSamplerMap =
56 std::unordered_map<std::pair<u32, u32>, Tegra::Engines::SamplerDescriptor, Common::PairHash>;
57
58struct GraphicsInfo {
59 using Maxwell = Tegra::Engines::Maxwell3D::Regs;
60
61 std::array<Maxwell::TransformFeedbackLayout, Maxwell::NumTransformFeedbackBuffers>
62 tfb_layouts{};
63 std::array<std::array<u8, 128>, Maxwell::NumTransformFeedbackBuffers> tfb_varying_locs{};
64 Maxwell::PrimitiveTopology primitive_topology{};
65 Maxwell::TessellationPrimitive tessellation_primitive{};
66 Maxwell::TessellationSpacing tessellation_spacing{};
67 bool tfb_enabled = false;
68 bool tessellation_clockwise = false;
69};
70static_assert(std::is_trivially_copyable_v<GraphicsInfo> &&
71 std::is_standard_layout_v<GraphicsInfo>);
72
73struct ComputeInfo {
74 std::array<u32, 3> workgroup_size{};
75 u32 shared_memory_size_in_words = 0;
76 u32 local_memory_size_in_words = 0;
77};
78static_assert(std::is_trivially_copyable_v<ComputeInfo> && std::is_standard_layout_v<ComputeInfo>);
79
80struct SerializedRegistryInfo {
81 VideoCore::GuestDriverProfile guest_driver_profile;
82 u32 bound_buffer = 0;
83 GraphicsInfo graphics;
84 ComputeInfo compute;
85};
86
87/**
88 * The Registry is a class use to interface the 3D and compute engines with the shader compiler.
89 * With it, the shader can obtain required data from GPU state and store it for disk shader
90 * compilation.
91 */
92class Registry {
93public:
94 explicit Registry(Tegra::Engines::ShaderType shader_stage, const SerializedRegistryInfo& info);
95
96 explicit Registry(Tegra::Engines::ShaderType shader_stage,
97 Tegra::Engines::ConstBufferEngineInterface& engine_);
98
99 ~Registry();
100
101 /// Retrieves a key from the registry, if it's registered, it will give the registered value, if
102 /// not it will obtain it from maxwell3d and register it.
103 std::optional<u32> ObtainKey(u32 buffer, u32 offset);
104
105 std::optional<Tegra::Engines::SamplerDescriptor> ObtainBoundSampler(u32 offset);
106
107 std::optional<Tegra::Engines::SamplerDescriptor> ObtainSeparateSampler(
108 std::pair<u32, u32> buffers, std::pair<u32, u32> offsets);
109
110 std::optional<Tegra::Engines::SamplerDescriptor> ObtainBindlessSampler(u32 buffer, u32 offset);
111
112 /// Inserts a key.
113 void InsertKey(u32 buffer, u32 offset, u32 value);
114
115 /// Inserts a bound sampler key.
116 void InsertBoundSampler(u32 offset, Tegra::Engines::SamplerDescriptor sampler);
117
118 /// Inserts a bindless sampler key.
119 void InsertBindlessSampler(u32 buffer, u32 offset, Tegra::Engines::SamplerDescriptor sampler);
120
121 /// Checks keys and samplers against engine's current const buffers.
122 /// Returns true if they are the same value, false otherwise.
123 bool IsConsistent() const;
124
125 /// Returns true if the keys are equal to the other ones in the registry.
126 bool HasEqualKeys(const Registry& rhs) const;
127
128 /// Returns graphics information from this shader
129 const GraphicsInfo& GetGraphicsInfo() const;
130
131 /// Returns compute information from this shader
132 const ComputeInfo& GetComputeInfo() const;
133
134 /// Gives an getter to the const buffer keys in the database.
135 const KeyMap& GetKeys() const {
136 return keys;
137 }
138
139 /// Gets samplers database.
140 const BoundSamplerMap& GetBoundSamplers() const {
141 return bound_samplers;
142 }
143
144 /// Gets bindless samplers database.
145 const BindlessSamplerMap& GetBindlessSamplers() const {
146 return bindless_samplers;
147 }
148
149 /// Gets bound buffer used on this shader
150 u32 GetBoundBuffer() const {
151 return bound_buffer;
152 }
153
154 /// Obtains access to the guest driver's profile.
155 VideoCore::GuestDriverProfile& AccessGuestDriverProfile() {
156 return engine ? engine->AccessGuestDriverProfile() : stored_guest_driver_profile;
157 }
158
159private:
160 const Tegra::Engines::ShaderType stage;
161 VideoCore::GuestDriverProfile stored_guest_driver_profile;
162 Tegra::Engines::ConstBufferEngineInterface* engine = nullptr;
163 KeyMap keys;
164 BoundSamplerMap bound_samplers;
165 SeparateSamplerMap separate_samplers;
166 BindlessSamplerMap bindless_samplers;
167 u32 bound_buffer;
168 GraphicsInfo graphics_info;
169 ComputeInfo compute_info;
170};
171
172} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/shader_ir.cpp b/src/video_core/shader/shader_ir.cpp
deleted file mode 100644
index a4987ffc6..000000000
--- a/src/video_core/shader/shader_ir.cpp
+++ /dev/null
@@ -1,464 +0,0 @@
1// Copyright 2018 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <algorithm>
6#include <array>
7#include <cmath>
8
9#include "common/assert.h"
10#include "common/common_types.h"
11#include "common/logging/log.h"
12#include "video_core/engines/shader_bytecode.h"
13#include "video_core/shader/node.h"
14#include "video_core/shader/node_helper.h"
15#include "video_core/shader/registry.h"
16#include "video_core/shader/shader_ir.h"
17
18namespace VideoCommon::Shader {
19
20using Tegra::Shader::Attribute;
21using Tegra::Shader::Instruction;
22using Tegra::Shader::IpaMode;
23using Tegra::Shader::Pred;
24using Tegra::Shader::PredCondition;
25using Tegra::Shader::PredOperation;
26using Tegra::Shader::Register;
27
28ShaderIR::ShaderIR(const ProgramCode& program_code_, u32 main_offset_, CompilerSettings settings_,
29 Registry& registry_)
30 : program_code{program_code_}, main_offset{main_offset_}, settings{settings_}, registry{
31 registry_} {
32 Decode();
33 PostDecode();
34}
35
36ShaderIR::~ShaderIR() = default;
37
38Node ShaderIR::GetRegister(Register reg) {
39 if (reg != Register::ZeroIndex) {
40 used_registers.insert(static_cast<u32>(reg));
41 }
42 return MakeNode<GprNode>(reg);
43}
44
45Node ShaderIR::GetCustomVariable(u32 id) {
46 return MakeNode<CustomVarNode>(id);
47}
48
49Node ShaderIR::GetImmediate19(Instruction instr) {
50 return Immediate(instr.alu.GetImm20_19());
51}
52
53Node ShaderIR::GetImmediate32(Instruction instr) {
54 return Immediate(instr.alu.GetImm20_32());
55}
56
57Node ShaderIR::GetConstBuffer(u64 index_, u64 offset_) {
58 const auto index = static_cast<u32>(index_);
59 const auto offset = static_cast<u32>(offset_);
60
61 used_cbufs.try_emplace(index).first->second.MarkAsUsed(offset);
62
63 return MakeNode<CbufNode>(index, Immediate(offset));
64}
65
66Node ShaderIR::GetConstBufferIndirect(u64 index_, u64 offset_, Node node) {
67 const auto index = static_cast<u32>(index_);
68 const auto offset = static_cast<u32>(offset_);
69
70 used_cbufs.try_emplace(index).first->second.MarkAsUsedIndirect();
71
72 Node final_offset = [&] {
73 // Attempt to inline constant buffer without a variable offset. This is done to allow
74 // tracking LDC calls.
75 if (const auto gpr = std::get_if<GprNode>(&*node)) {
76 if (gpr->GetIndex() == Register::ZeroIndex) {
77 return Immediate(offset);
78 }
79 }
80 return Operation(OperationCode::UAdd, NO_PRECISE, std::move(node), Immediate(offset));
81 }();
82 return MakeNode<CbufNode>(index, std::move(final_offset));
83}
84
85Node ShaderIR::GetPredicate(u64 pred_, bool negated) {
86 const auto pred = static_cast<Pred>(pred_);
87 if (pred != Pred::UnusedIndex && pred != Pred::NeverExecute) {
88 used_predicates.insert(pred);
89 }
90
91 return MakeNode<PredicateNode>(pred, negated);
92}
93
94Node ShaderIR::GetPredicate(bool immediate) {
95 return GetPredicate(static_cast<u64>(immediate ? Pred::UnusedIndex : Pred::NeverExecute));
96}
97
98Node ShaderIR::GetInputAttribute(Attribute::Index index, u64 element, Node buffer) {
99 MarkAttributeUsage(index, element);
100 used_input_attributes.emplace(index);
101 return MakeNode<AbufNode>(index, static_cast<u32>(element), std::move(buffer));
102}
103
104Node ShaderIR::GetPhysicalInputAttribute(Tegra::Shader::Register physical_address, Node buffer) {
105 uses_physical_attributes = true;
106 return MakeNode<AbufNode>(GetRegister(physical_address), buffer);
107}
108
109Node ShaderIR::GetOutputAttribute(Attribute::Index index, u64 element, Node buffer) {
110 MarkAttributeUsage(index, element);
111 used_output_attributes.insert(index);
112 return MakeNode<AbufNode>(index, static_cast<u32>(element), std::move(buffer));
113}
114
115Node ShaderIR::GetInternalFlag(InternalFlag flag, bool negated) const {
116 Node node = MakeNode<InternalFlagNode>(flag);
117 if (negated) {
118 return Operation(OperationCode::LogicalNegate, std::move(node));
119 }
120 return node;
121}
122
123Node ShaderIR::GetLocalMemory(Node address) {
124 return MakeNode<LmemNode>(std::move(address));
125}
126
127Node ShaderIR::GetSharedMemory(Node address) {
128 return MakeNode<SmemNode>(std::move(address));
129}
130
131Node ShaderIR::GetTemporary(u32 id) {
132 return GetRegister(Register::ZeroIndex + 1 + id);
133}
134
135Node ShaderIR::GetOperandAbsNegFloat(Node value, bool absolute, bool negate) {
136 if (absolute) {
137 value = Operation(OperationCode::FAbsolute, NO_PRECISE, std::move(value));
138 }
139 if (negate) {
140 value = Operation(OperationCode::FNegate, NO_PRECISE, std::move(value));
141 }
142 return value;
143}
144
145Node ShaderIR::GetSaturatedFloat(Node value, bool saturate) {
146 if (!saturate) {
147 return value;
148 }
149
150 Node positive_zero = Immediate(std::copysignf(0, 1));
151 Node positive_one = Immediate(1.0f);
152 return Operation(OperationCode::FClamp, NO_PRECISE, std::move(value), std::move(positive_zero),
153 std::move(positive_one));
154}
155
156Node ShaderIR::ConvertIntegerSize(Node value, Register::Size size, bool is_signed) {
157 switch (size) {
158 case Register::Size::Byte:
159 value = SignedOperation(OperationCode::ILogicalShiftLeft, is_signed, NO_PRECISE,
160 std::move(value), Immediate(24));
161 value = SignedOperation(OperationCode::IArithmeticShiftRight, is_signed, NO_PRECISE,
162 std::move(value), Immediate(24));
163 return value;
164 case Register::Size::Short:
165 value = SignedOperation(OperationCode::ILogicalShiftLeft, is_signed, NO_PRECISE,
166 std::move(value), Immediate(16));
167 value = SignedOperation(OperationCode::IArithmeticShiftRight, is_signed, NO_PRECISE,
168 std::move(value), Immediate(16));
169 return value;
170 case Register::Size::Word:
171 // Default - do nothing
172 return value;
173 default:
174 UNREACHABLE_MSG("Unimplemented conversion size: {}", size);
175 return value;
176 }
177}
178
179Node ShaderIR::GetOperandAbsNegInteger(Node value, bool absolute, bool negate, bool is_signed) {
180 if (!is_signed) {
181 // Absolute or negate on an unsigned is pointless
182 return value;
183 }
184 if (absolute) {
185 value = Operation(OperationCode::IAbsolute, NO_PRECISE, std::move(value));
186 }
187 if (negate) {
188 value = Operation(OperationCode::INegate, NO_PRECISE, std::move(value));
189 }
190 return value;
191}
192
193Node ShaderIR::UnpackHalfImmediate(Instruction instr, bool has_negation) {
194 Node value = Immediate(instr.half_imm.PackImmediates());
195 if (!has_negation) {
196 return value;
197 }
198
199 Node first_negate = GetPredicate(instr.half_imm.first_negate != 0);
200 Node second_negate = GetPredicate(instr.half_imm.second_negate != 0);
201
202 return Operation(OperationCode::HNegate, NO_PRECISE, std::move(value), std::move(first_negate),
203 std::move(second_negate));
204}
205
206Node ShaderIR::UnpackHalfFloat(Node value, Tegra::Shader::HalfType type) {
207 return Operation(OperationCode::HUnpack, type, std::move(value));
208}
209
210Node ShaderIR::HalfMerge(Node dest, Node src, Tegra::Shader::HalfMerge merge) {
211 switch (merge) {
212 case Tegra::Shader::HalfMerge::H0_H1:
213 return src;
214 case Tegra::Shader::HalfMerge::F32:
215 return Operation(OperationCode::HMergeF32, std::move(src));
216 case Tegra::Shader::HalfMerge::Mrg_H0:
217 return Operation(OperationCode::HMergeH0, std::move(dest), std::move(src));
218 case Tegra::Shader::HalfMerge::Mrg_H1:
219 return Operation(OperationCode::HMergeH1, std::move(dest), std::move(src));
220 }
221 UNREACHABLE();
222 return src;
223}
224
225Node ShaderIR::GetOperandAbsNegHalf(Node value, bool absolute, bool negate) {
226 if (absolute) {
227 value = Operation(OperationCode::HAbsolute, NO_PRECISE, std::move(value));
228 }
229 if (negate) {
230 value = Operation(OperationCode::HNegate, NO_PRECISE, std::move(value), GetPredicate(true),
231 GetPredicate(true));
232 }
233 return value;
234}
235
236Node ShaderIR::GetSaturatedHalfFloat(Node value, bool saturate) {
237 if (!saturate) {
238 return value;
239 }
240
241 Node positive_zero = Immediate(std::copysignf(0, 1));
242 Node positive_one = Immediate(1.0f);
243 return Operation(OperationCode::HClamp, NO_PRECISE, std::move(value), std::move(positive_zero),
244 std::move(positive_one));
245}
246
247Node ShaderIR::GetPredicateComparisonFloat(PredCondition condition, Node op_a, Node op_b) {
248 if (condition == PredCondition::T) {
249 return GetPredicate(true);
250 } else if (condition == PredCondition::F) {
251 return GetPredicate(false);
252 }
253
254 static constexpr std::array comparison_table{
255 OperationCode(0),
256 OperationCode::LogicalFOrdLessThan, // LT
257 OperationCode::LogicalFOrdEqual, // EQ
258 OperationCode::LogicalFOrdLessEqual, // LE
259 OperationCode::LogicalFOrdGreaterThan, // GT
260 OperationCode::LogicalFOrdNotEqual, // NE
261 OperationCode::LogicalFOrdGreaterEqual, // GE
262 OperationCode::LogicalFOrdered, // NUM
263 OperationCode::LogicalFUnordered, // NAN
264 OperationCode::LogicalFUnordLessThan, // LTU
265 OperationCode::LogicalFUnordEqual, // EQU
266 OperationCode::LogicalFUnordLessEqual, // LEU
267 OperationCode::LogicalFUnordGreaterThan, // GTU
268 OperationCode::LogicalFUnordNotEqual, // NEU
269 OperationCode::LogicalFUnordGreaterEqual, // GEU
270 };
271 const std::size_t index = static_cast<std::size_t>(condition);
272 ASSERT_MSG(index < std::size(comparison_table), "Invalid condition={}", index);
273
274 return Operation(comparison_table[index], op_a, op_b);
275}
276
277Node ShaderIR::GetPredicateComparisonInteger(PredCondition condition, bool is_signed, Node op_a,
278 Node op_b) {
279 static constexpr std::array comparison_table{
280 std::pair{PredCondition::LT, OperationCode::LogicalILessThan},
281 std::pair{PredCondition::EQ, OperationCode::LogicalIEqual},
282 std::pair{PredCondition::LE, OperationCode::LogicalILessEqual},
283 std::pair{PredCondition::GT, OperationCode::LogicalIGreaterThan},
284 std::pair{PredCondition::NE, OperationCode::LogicalINotEqual},
285 std::pair{PredCondition::GE, OperationCode::LogicalIGreaterEqual},
286 };
287
288 const auto comparison =
289 std::find_if(comparison_table.cbegin(), comparison_table.cend(),
290 [condition](const auto entry) { return condition == entry.first; });
291 UNIMPLEMENTED_IF_MSG(comparison == comparison_table.cend(),
292 "Unknown predicate comparison operation");
293
294 return SignedOperation(comparison->second, is_signed, NO_PRECISE, std::move(op_a),
295 std::move(op_b));
296}
297
298Node ShaderIR::GetPredicateComparisonHalf(Tegra::Shader::PredCondition condition, Node op_a,
299 Node op_b) {
300 static constexpr std::array comparison_table{
301 std::pair{PredCondition::LT, OperationCode::Logical2HLessThan},
302 std::pair{PredCondition::EQ, OperationCode::Logical2HEqual},
303 std::pair{PredCondition::LE, OperationCode::Logical2HLessEqual},
304 std::pair{PredCondition::GT, OperationCode::Logical2HGreaterThan},
305 std::pair{PredCondition::NE, OperationCode::Logical2HNotEqual},
306 std::pair{PredCondition::GE, OperationCode::Logical2HGreaterEqual},
307 std::pair{PredCondition::LTU, OperationCode::Logical2HLessThanWithNan},
308 std::pair{PredCondition::LEU, OperationCode::Logical2HLessEqualWithNan},
309 std::pair{PredCondition::GTU, OperationCode::Logical2HGreaterThanWithNan},
310 std::pair{PredCondition::NEU, OperationCode::Logical2HNotEqualWithNan},
311 std::pair{PredCondition::GEU, OperationCode::Logical2HGreaterEqualWithNan},
312 };
313
314 const auto comparison =
315 std::find_if(comparison_table.cbegin(), comparison_table.cend(),
316 [condition](const auto entry) { return condition == entry.first; });
317 UNIMPLEMENTED_IF_MSG(comparison == comparison_table.cend(),
318 "Unknown predicate comparison operation");
319
320 return Operation(comparison->second, NO_PRECISE, std::move(op_a), std::move(op_b));
321}
322
323OperationCode ShaderIR::GetPredicateCombiner(PredOperation operation) {
324 static constexpr std::array operation_table{
325 OperationCode::LogicalAnd,
326 OperationCode::LogicalOr,
327 OperationCode::LogicalXor,
328 };
329
330 const auto index = static_cast<std::size_t>(operation);
331 if (index >= operation_table.size()) {
332 UNIMPLEMENTED_MSG("Unknown predicate operation.");
333 return {};
334 }
335
336 return operation_table[index];
337}
338
339Node ShaderIR::GetConditionCode(ConditionCode cc) const {
340 switch (cc) {
341 case ConditionCode::NEU:
342 return GetInternalFlag(InternalFlag::Zero, true);
343 case ConditionCode::FCSM_TR:
344 UNIMPLEMENTED_MSG("EXIT.FCSM_TR is not implemented");
345 return MakeNode<PredicateNode>(Pred::NeverExecute, false);
346 default:
347 UNIMPLEMENTED_MSG("Unimplemented condition code: {}", cc);
348 return MakeNode<PredicateNode>(Pred::NeverExecute, false);
349 }
350}
351
352void ShaderIR::SetRegister(NodeBlock& bb, Register dest, Node src) {
353 bb.push_back(Operation(OperationCode::Assign, GetRegister(dest), std::move(src)));
354}
355
356void ShaderIR::SetPredicate(NodeBlock& bb, u64 dest, Node src) {
357 bb.push_back(Operation(OperationCode::LogicalAssign, GetPredicate(dest), std::move(src)));
358}
359
360void ShaderIR::SetInternalFlag(NodeBlock& bb, InternalFlag flag, Node value) {
361 bb.push_back(Operation(OperationCode::LogicalAssign, GetInternalFlag(flag), std::move(value)));
362}
363
364void ShaderIR::SetLocalMemory(NodeBlock& bb, Node address, Node value) {
365 bb.push_back(
366 Operation(OperationCode::Assign, GetLocalMemory(std::move(address)), std::move(value)));
367}
368
369void ShaderIR::SetSharedMemory(NodeBlock& bb, Node address, Node value) {
370 bb.push_back(
371 Operation(OperationCode::Assign, GetSharedMemory(std::move(address)), std::move(value)));
372}
373
374void ShaderIR::SetTemporary(NodeBlock& bb, u32 id, Node value) {
375 SetRegister(bb, Register::ZeroIndex + 1 + id, std::move(value));
376}
377
378void ShaderIR::SetInternalFlagsFromFloat(NodeBlock& bb, Node value, bool sets_cc) {
379 if (!sets_cc) {
380 return;
381 }
382 Node zerop = Operation(OperationCode::LogicalFOrdEqual, std::move(value), Immediate(0.0f));
383 SetInternalFlag(bb, InternalFlag::Zero, std::move(zerop));
384 LOG_WARNING(HW_GPU, "Condition codes implementation is incomplete");
385}
386
387void ShaderIR::SetInternalFlagsFromInteger(NodeBlock& bb, Node value, bool sets_cc) {
388 if (!sets_cc) {
389 return;
390 }
391 Node zerop = Operation(OperationCode::LogicalIEqual, std::move(value), Immediate(0));
392 SetInternalFlag(bb, InternalFlag::Zero, std::move(zerop));
393 LOG_WARNING(HW_GPU, "Condition codes implementation is incomplete");
394}
395
396Node ShaderIR::BitfieldExtract(Node value, u32 offset, u32 bits) {
397 return Operation(OperationCode::UBitfieldExtract, NO_PRECISE, std::move(value),
398 Immediate(offset), Immediate(bits));
399}
400
401Node ShaderIR::BitfieldInsert(Node base, Node insert, u32 offset, u32 bits) {
402 return Operation(OperationCode::UBitfieldInsert, NO_PRECISE, base, insert, Immediate(offset),
403 Immediate(bits));
404}
405
406void ShaderIR::MarkAttributeUsage(Attribute::Index index, u64 element) {
407 switch (index) {
408 case Attribute::Index::LayerViewportPointSize:
409 switch (element) {
410 case 0:
411 UNIMPLEMENTED();
412 break;
413 case 1:
414 uses_layer = true;
415 break;
416 case 2:
417 uses_viewport_index = true;
418 break;
419 case 3:
420 uses_point_size = true;
421 break;
422 }
423 break;
424 case Attribute::Index::TessCoordInstanceIDVertexID:
425 switch (element) {
426 case 2:
427 uses_instance_id = true;
428 break;
429 case 3:
430 uses_vertex_id = true;
431 break;
432 }
433 break;
434 case Attribute::Index::ClipDistances0123:
435 case Attribute::Index::ClipDistances4567: {
436 const u64 clip_index = (index == Attribute::Index::ClipDistances4567 ? 4 : 0) + element;
437 used_clip_distances.at(clip_index) = true;
438 break;
439 }
440 case Attribute::Index::FrontColor:
441 case Attribute::Index::FrontSecondaryColor:
442 case Attribute::Index::BackColor:
443 case Attribute::Index::BackSecondaryColor:
444 uses_legacy_varyings = true;
445 break;
446 default:
447 if (index >= Attribute::Index::TexCoord_0 && index <= Attribute::Index::TexCoord_7) {
448 uses_legacy_varyings = true;
449 }
450 break;
451 }
452}
453
454std::size_t ShaderIR::DeclareAmend(Node new_amend) {
455 const auto id = amend_code.size();
456 amend_code.push_back(std::move(new_amend));
457 return id;
458}
459
460u32 ShaderIR::NewCustomVariable() {
461 return num_custom_variables++;
462}
463
464} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h
deleted file mode 100644
index 1cd7c14d7..000000000
--- a/src/video_core/shader/shader_ir.h
+++ /dev/null
@@ -1,479 +0,0 @@
1// Copyright 2018 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <array>
8#include <list>
9#include <map>
10#include <optional>
11#include <set>
12#include <tuple>
13#include <vector>
14
15#include "common/common_types.h"
16#include "video_core/engines/maxwell_3d.h"
17#include "video_core/engines/shader_bytecode.h"
18#include "video_core/engines/shader_header.h"
19#include "video_core/shader/ast.h"
20#include "video_core/shader/compiler_settings.h"
21#include "video_core/shader/memory_util.h"
22#include "video_core/shader/node.h"
23#include "video_core/shader/registry.h"
24
25namespace VideoCommon::Shader {
26
27struct ShaderBlock;
28
29constexpr u32 MAX_PROGRAM_LENGTH = 0x1000;
30
31struct ConstBuffer {
32 constexpr explicit ConstBuffer(u32 max_offset_, bool is_indirect_)
33 : max_offset{max_offset_}, is_indirect{is_indirect_} {}
34
35 constexpr ConstBuffer() = default;
36
37 void MarkAsUsed(u64 offset) {
38 max_offset = std::max(max_offset, static_cast<u32>(offset));
39 }
40
41 void MarkAsUsedIndirect() {
42 is_indirect = true;
43 }
44
45 bool IsIndirect() const {
46 return is_indirect;
47 }
48
49 u32 GetSize() const {
50 return max_offset + static_cast<u32>(sizeof(float));
51 }
52
53 u32 GetMaxOffset() const {
54 return max_offset;
55 }
56
57private:
58 u32 max_offset = 0;
59 bool is_indirect = false;
60};
61
62struct GlobalMemoryUsage {
63 bool is_read{};
64 bool is_written{};
65};
66
67class ShaderIR final {
68public:
69 explicit ShaderIR(const ProgramCode& program_code_, u32 main_offset_,
70 CompilerSettings settings_, Registry& registry_);
71 ~ShaderIR();
72
73 const std::map<u32, NodeBlock>& GetBasicBlocks() const {
74 return basic_blocks;
75 }
76
77 const std::set<u32>& GetRegisters() const {
78 return used_registers;
79 }
80
81 const std::set<Tegra::Shader::Pred>& GetPredicates() const {
82 return used_predicates;
83 }
84
85 const std::set<Tegra::Shader::Attribute::Index>& GetInputAttributes() const {
86 return used_input_attributes;
87 }
88
89 const std::set<Tegra::Shader::Attribute::Index>& GetOutputAttributes() const {
90 return used_output_attributes;
91 }
92
93 const std::map<u32, ConstBuffer>& GetConstantBuffers() const {
94 return used_cbufs;
95 }
96
97 const std::list<SamplerEntry>& GetSamplers() const {
98 return used_samplers;
99 }
100
101 const std::list<ImageEntry>& GetImages() const {
102 return used_images;
103 }
104
105 const std::array<bool, Tegra::Engines::Maxwell3D::Regs::NumClipDistances>& GetClipDistances()
106 const {
107 return used_clip_distances;
108 }
109
110 const std::map<GlobalMemoryBase, GlobalMemoryUsage>& GetGlobalMemory() const {
111 return used_global_memory;
112 }
113
114 std::size_t GetLength() const {
115 return static_cast<std::size_t>(coverage_end * sizeof(u64));
116 }
117
118 bool UsesLayer() const {
119 return uses_layer;
120 }
121
122 bool UsesViewportIndex() const {
123 return uses_viewport_index;
124 }
125
126 bool UsesPointSize() const {
127 return uses_point_size;
128 }
129
130 bool UsesInstanceId() const {
131 return uses_instance_id;
132 }
133
134 bool UsesVertexId() const {
135 return uses_vertex_id;
136 }
137
138 bool UsesLegacyVaryings() const {
139 return uses_legacy_varyings;
140 }
141
142 bool UsesYNegate() const {
143 return uses_y_negate;
144 }
145
146 bool UsesWarps() const {
147 return uses_warps;
148 }
149
150 bool HasPhysicalAttributes() const {
151 return uses_physical_attributes;
152 }
153
154 const Tegra::Shader::Header& GetHeader() const {
155 return header;
156 }
157
158 bool IsFlowStackDisabled() const {
159 return disable_flow_stack;
160 }
161
162 bool IsDecompiled() const {
163 return decompiled;
164 }
165
166 const ASTManager& GetASTManager() const {
167 return program_manager;
168 }
169
170 ASTNode GetASTProgram() const {
171 return program_manager.GetProgram();
172 }
173
174 u32 GetASTNumVariables() const {
175 return program_manager.GetVariables();
176 }
177
178 u32 ConvertAddressToNvidiaSpace(u32 address) const {
179 return (address - main_offset) * static_cast<u32>(sizeof(Tegra::Shader::Instruction));
180 }
181
182 /// Returns a condition code evaluated from internal flags
183 Node GetConditionCode(Tegra::Shader::ConditionCode cc) const;
184
185 const Node& GetAmendNode(std::size_t index) const {
186 return amend_code[index];
187 }
188
189 u32 GetNumCustomVariables() const {
190 return num_custom_variables;
191 }
192
193private:
194 friend class ASTDecoder;
195
196 struct SamplerInfo {
197 std::optional<Tegra::Shader::TextureType> type;
198 std::optional<bool> is_array;
199 std::optional<bool> is_shadow;
200 std::optional<bool> is_buffer;
201
202 constexpr bool IsComplete() const noexcept {
203 return type && is_array && is_shadow && is_buffer;
204 }
205 };
206
207 void Decode();
208 void PostDecode();
209
210 NodeBlock DecodeRange(u32 begin, u32 end);
211 void DecodeRangeInner(NodeBlock& bb, u32 begin, u32 end);
212 void InsertControlFlow(NodeBlock& bb, const ShaderBlock& block);
213
214 /**
215 * Decodes a single instruction from Tegra to IR.
216 * @param bb Basic block where the nodes will be written to.
217 * @param pc Program counter. Offset to decode.
218 * @return Next address to decode.
219 */
220 u32 DecodeInstr(NodeBlock& bb, u32 pc);
221
222 u32 DecodeArithmetic(NodeBlock& bb, u32 pc);
223 u32 DecodeArithmeticImmediate(NodeBlock& bb, u32 pc);
224 u32 DecodeBfe(NodeBlock& bb, u32 pc);
225 u32 DecodeBfi(NodeBlock& bb, u32 pc);
226 u32 DecodeShift(NodeBlock& bb, u32 pc);
227 u32 DecodeArithmeticInteger(NodeBlock& bb, u32 pc);
228 u32 DecodeArithmeticIntegerImmediate(NodeBlock& bb, u32 pc);
229 u32 DecodeArithmeticHalf(NodeBlock& bb, u32 pc);
230 u32 DecodeArithmeticHalfImmediate(NodeBlock& bb, u32 pc);
231 u32 DecodeFfma(NodeBlock& bb, u32 pc);
232 u32 DecodeHfma2(NodeBlock& bb, u32 pc);
233 u32 DecodeConversion(NodeBlock& bb, u32 pc);
234 u32 DecodeWarp(NodeBlock& bb, u32 pc);
235 u32 DecodeMemory(NodeBlock& bb, u32 pc);
236 u32 DecodeTexture(NodeBlock& bb, u32 pc);
237 u32 DecodeImage(NodeBlock& bb, u32 pc);
238 u32 DecodeFloatSetPredicate(NodeBlock& bb, u32 pc);
239 u32 DecodeIntegerSetPredicate(NodeBlock& bb, u32 pc);
240 u32 DecodeHalfSetPredicate(NodeBlock& bb, u32 pc);
241 u32 DecodePredicateSetRegister(NodeBlock& bb, u32 pc);
242 u32 DecodePredicateSetPredicate(NodeBlock& bb, u32 pc);
243 u32 DecodeRegisterSetPredicate(NodeBlock& bb, u32 pc);
244 u32 DecodeFloatSet(NodeBlock& bb, u32 pc);
245 u32 DecodeIntegerSet(NodeBlock& bb, u32 pc);
246 u32 DecodeHalfSet(NodeBlock& bb, u32 pc);
247 u32 DecodeVideo(NodeBlock& bb, u32 pc);
248 u32 DecodeXmad(NodeBlock& bb, u32 pc);
249 u32 DecodeOther(NodeBlock& bb, u32 pc);
250
251 /// Generates a node for a passed register.
252 Node GetRegister(Tegra::Shader::Register reg);
253 /// Generates a node for a custom variable
254 Node GetCustomVariable(u32 id);
255 /// Generates a node representing a 19-bit immediate value
256 Node GetImmediate19(Tegra::Shader::Instruction instr);
257 /// Generates a node representing a 32-bit immediate value
258 Node GetImmediate32(Tegra::Shader::Instruction instr);
259 /// Generates a node representing a constant buffer
260 Node GetConstBuffer(u64 index, u64 offset);
261 /// Generates a node representing a constant buffer with a variadic offset
262 Node GetConstBufferIndirect(u64 index, u64 offset, Node node);
263 /// Generates a node for a passed predicate. It can be optionally negated
264 Node GetPredicate(u64 pred, bool negated = false);
265 /// Generates a predicate node for an immediate true or false value
266 Node GetPredicate(bool immediate);
267 /// Generates a node representing an input attribute. Keeps track of used attributes.
268 Node GetInputAttribute(Tegra::Shader::Attribute::Index index, u64 element, Node buffer = {});
269 /// Generates a node representing a physical input attribute.
270 Node GetPhysicalInputAttribute(Tegra::Shader::Register physical_address, Node buffer = {});
271 /// Generates a node representing an output attribute. Keeps track of used attributes.
272 Node GetOutputAttribute(Tegra::Shader::Attribute::Index index, u64 element, Node buffer);
273 /// Generates a node representing an internal flag
274 Node GetInternalFlag(InternalFlag flag, bool negated = false) const;
275 /// Generates a node representing a local memory address
276 Node GetLocalMemory(Node address);
277 /// Generates a node representing a shared memory address
278 Node GetSharedMemory(Node address);
279 /// Generates a temporary, internally it uses a post-RZ register
280 Node GetTemporary(u32 id);
281
282 /// Sets a register. src value must be a number-evaluated node.
283 void SetRegister(NodeBlock& bb, Tegra::Shader::Register dest, Node src);
284 /// Sets a predicate. src value must be a bool-evaluated node
285 void SetPredicate(NodeBlock& bb, u64 dest, Node src);
286 /// Sets an internal flag. src value must be a bool-evaluated node
287 void SetInternalFlag(NodeBlock& bb, InternalFlag flag, Node value);
288 /// Sets a local memory address with a value.
289 void SetLocalMemory(NodeBlock& bb, Node address, Node value);
290 /// Sets a shared memory address with a value.
291 void SetSharedMemory(NodeBlock& bb, Node address, Node value);
292 /// Sets a temporary. Internally it uses a post-RZ register
293 void SetTemporary(NodeBlock& bb, u32 id, Node value);
294
295 /// Sets internal flags from a float
296 void SetInternalFlagsFromFloat(NodeBlock& bb, Node value, bool sets_cc = true);
297 /// Sets internal flags from an integer
298 void SetInternalFlagsFromInteger(NodeBlock& bb, Node value, bool sets_cc = true);
299
300 /// Conditionally absolute/negated float. Absolute is applied first
301 Node GetOperandAbsNegFloat(Node value, bool absolute, bool negate);
302 /// Conditionally saturates a float
303 Node GetSaturatedFloat(Node value, bool saturate = true);
304
305 /// Converts an integer to different sizes.
306 Node ConvertIntegerSize(Node value, Tegra::Shader::Register::Size size, bool is_signed);
307 /// Conditionally absolute/negated integer. Absolute is applied first
308 Node GetOperandAbsNegInteger(Node value, bool absolute, bool negate, bool is_signed);
309
310 /// Unpacks a half immediate from an instruction
311 Node UnpackHalfImmediate(Tegra::Shader::Instruction instr, bool has_negation);
312 /// Unpacks a binary value into a half float pair with a type format
313 Node UnpackHalfFloat(Node value, Tegra::Shader::HalfType type);
314 /// Merges a half pair into another value
315 Node HalfMerge(Node dest, Node src, Tegra::Shader::HalfMerge merge);
316 /// Conditionally absolute/negated half float pair. Absolute is applied first
317 Node GetOperandAbsNegHalf(Node value, bool absolute, bool negate);
318 /// Conditionally saturates a half float pair
319 Node GetSaturatedHalfFloat(Node value, bool saturate = true);
320
321 /// Get image component value by type and size
322 std::pair<Node, bool> GetComponentValue(Tegra::Texture::ComponentType component_type,
323 u32 component_size, Node original_value);
324
325 /// Returns a predicate comparing two floats
326 Node GetPredicateComparisonFloat(Tegra::Shader::PredCondition condition, Node op_a, Node op_b);
327 /// Returns a predicate comparing two integers
328 Node GetPredicateComparisonInteger(Tegra::Shader::PredCondition condition, bool is_signed,
329 Node op_a, Node op_b);
330 /// Returns a predicate comparing two half floats. meta consumes how both pairs will be compared
331 Node GetPredicateComparisonHalf(Tegra::Shader::PredCondition condition, Node op_a, Node op_b);
332
333 /// Returns a predicate combiner operation
334 OperationCode GetPredicateCombiner(Tegra::Shader::PredOperation operation);
335
336 /// Queries the missing sampler info from the execution context.
337 SamplerInfo GetSamplerInfo(SamplerInfo info,
338 std::optional<Tegra::Engines::SamplerDescriptor> sampler);
339
340 /// Accesses a texture sampler.
341 std::optional<SamplerEntry> GetSampler(Tegra::Shader::Sampler sampler, SamplerInfo info);
342
343 /// Accesses a texture sampler for a bindless texture.
344 std::optional<SamplerEntry> GetBindlessSampler(Tegra::Shader::Register reg, SamplerInfo info,
345 Node& index_var);
346
347 /// Accesses an image.
348 ImageEntry& GetImage(Tegra::Shader::Image image, Tegra::Shader::ImageType type);
349
350 /// Access a bindless image sampler.
351 ImageEntry& GetBindlessImage(Tegra::Shader::Register reg, Tegra::Shader::ImageType type);
352
353 /// Extracts a sequence of bits from a node
354 Node BitfieldExtract(Node value, u32 offset, u32 bits);
355
356 /// Inserts a sequence of bits from a node
357 Node BitfieldInsert(Node base, Node insert, u32 offset, u32 bits);
358
359 /// Marks the usage of a input or output attribute.
360 void MarkAttributeUsage(Tegra::Shader::Attribute::Index index, u64 element);
361
362 /// Decodes VMNMX instruction and inserts its code into the passed basic block.
363 void DecodeVMNMX(NodeBlock& bb, Tegra::Shader::Instruction instr);
364
365 void WriteTexInstructionFloat(NodeBlock& bb, Tegra::Shader::Instruction instr,
366 const Node4& components);
367
368 void WriteTexsInstructionFloat(NodeBlock& bb, Tegra::Shader::Instruction instr,
369 const Node4& components, bool ignore_mask = false);
370 void WriteTexsInstructionHalfFloat(NodeBlock& bb, Tegra::Shader::Instruction instr,
371 const Node4& components, bool ignore_mask = false);
372
373 Node4 GetTexCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type,
374 Tegra::Shader::TextureProcessMode process_mode, bool depth_compare,
375 bool is_array, bool is_aoffi,
376 std::optional<Tegra::Shader::Register> bindless_reg);
377
378 Node4 GetTexsCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type,
379 Tegra::Shader::TextureProcessMode process_mode, bool depth_compare,
380 bool is_array);
381
382 Node4 GetTld4Code(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type,
383 bool depth_compare, bool is_array, bool is_aoffi, bool is_ptp,
384 bool is_bindless);
385
386 Node4 GetTldCode(Tegra::Shader::Instruction instr);
387
388 Node4 GetTldsCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type,
389 bool is_array);
390
391 std::tuple<std::size_t, std::size_t> ValidateAndGetCoordinateElement(
392 Tegra::Shader::TextureType texture_type, bool depth_compare, bool is_array,
393 bool lod_bias_enabled, std::size_t max_coords, std::size_t max_inputs);
394
395 std::vector<Node> GetAoffiCoordinates(Node aoffi_reg, std::size_t coord_count, bool is_tld4);
396
397 std::vector<Node> GetPtpCoordinates(std::array<Node, 2> ptp_regs);
398
399 Node4 GetTextureCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type,
400 Tegra::Shader::TextureProcessMode process_mode, std::vector<Node> coords,
401 Node array, Node depth_compare, u32 bias_offset, std::vector<Node> aoffi,
402 std::optional<Tegra::Shader::Register> bindless_reg);
403
404 Node GetVideoOperand(Node op, bool is_chunk, bool is_signed, Tegra::Shader::VideoType type,
405 u64 byte_height);
406
407 void WriteLogicOperation(NodeBlock& bb, Tegra::Shader::Register dest,
408 Tegra::Shader::LogicOperation logic_op, Node op_a, Node op_b,
409 Tegra::Shader::PredicateResultMode predicate_mode,
410 Tegra::Shader::Pred predicate, bool sets_cc);
411 void WriteLop3Instruction(NodeBlock& bb, Tegra::Shader::Register dest, Node op_a, Node op_b,
412 Node op_c, Node imm_lut, bool sets_cc);
413
414 std::tuple<Node, u32, u32> TrackCbuf(Node tracked, const NodeBlock& code, s64 cursor) const;
415
416 std::pair<Node, TrackSampler> TrackBindlessSampler(Node tracked, const NodeBlock& code,
417 s64 cursor);
418
419 std::pair<Node, TrackSampler> HandleBindlessIndirectRead(const CbufNode& cbuf,
420 const OperationNode& operation,
421 Node gpr, Node base_offset,
422 Node tracked, const NodeBlock& code,
423 s64 cursor);
424
425 std::optional<u32> TrackImmediate(Node tracked, const NodeBlock& code, s64 cursor) const;
426
427 std::pair<Node, s64> TrackRegister(const GprNode* tracked, const NodeBlock& code,
428 s64 cursor) const;
429
430 std::tuple<Node, Node, GlobalMemoryBase> TrackGlobalMemory(NodeBlock& bb,
431 Tegra::Shader::Instruction instr,
432 bool is_read, bool is_write);
433
434 /// Register new amending code and obtain the reference id.
435 std::size_t DeclareAmend(Node new_amend);
436
437 u32 NewCustomVariable();
438
439 const ProgramCode& program_code;
440 const u32 main_offset;
441 const CompilerSettings settings;
442 Registry& registry;
443
444 bool decompiled{};
445 bool disable_flow_stack{};
446
447 u32 coverage_begin{};
448 u32 coverage_end{};
449
450 std::map<u32, NodeBlock> basic_blocks;
451 NodeBlock global_code;
452 ASTManager program_manager{true, true};
453 std::vector<Node> amend_code;
454 u32 num_custom_variables{};
455
456 std::set<u32> used_registers;
457 std::set<Tegra::Shader::Pred> used_predicates;
458 std::set<Tegra::Shader::Attribute::Index> used_input_attributes;
459 std::set<Tegra::Shader::Attribute::Index> used_output_attributes;
460 std::map<u32, ConstBuffer> used_cbufs;
461 std::list<SamplerEntry> used_samplers;
462 std::list<ImageEntry> used_images;
463 std::array<bool, Tegra::Engines::Maxwell3D::Regs::NumClipDistances> used_clip_distances{};
464 std::map<GlobalMemoryBase, GlobalMemoryUsage> used_global_memory;
465 bool uses_layer{};
466 bool uses_viewport_index{};
467 bool uses_point_size{};
468 bool uses_physical_attributes{}; // Shader uses AL2P or physical attribute read/writes
469 bool uses_instance_id{};
470 bool uses_vertex_id{};
471 bool uses_legacy_varyings{};
472 bool uses_y_negate{};
473 bool uses_warps{};
474 bool uses_indexed_samplers{};
475
476 Tegra::Shader::Header header;
477};
478
479} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/track.cpp b/src/video_core/shader/track.cpp
deleted file mode 100644
index 6be3ea92b..000000000
--- a/src/video_core/shader/track.cpp
+++ /dev/null
@@ -1,236 +0,0 @@
1// Copyright 2018 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <algorithm>
6#include <utility>
7#include <variant>
8
9#include "common/common_types.h"
10#include "video_core/shader/node.h"
11#include "video_core/shader/node_helper.h"
12#include "video_core/shader/shader_ir.h"
13
14namespace VideoCommon::Shader {
15
16namespace {
17
18std::pair<Node, s64> FindOperation(const NodeBlock& code, s64 cursor,
19 OperationCode operation_code) {
20 for (; cursor >= 0; --cursor) {
21 Node node = code.at(cursor);
22
23 if (const auto operation = std::get_if<OperationNode>(&*node)) {
24 if (operation->GetCode() == operation_code) {
25 return {std::move(node), cursor};
26 }
27 }
28
29 if (const auto conditional = std::get_if<ConditionalNode>(&*node)) {
30 const auto& conditional_code = conditional->GetCode();
31 auto result = FindOperation(
32 conditional_code, static_cast<s64>(conditional_code.size() - 1), operation_code);
33 auto& found = result.first;
34 if (found) {
35 return {std::move(found), cursor};
36 }
37 }
38 }
39 return {};
40}
41
42std::optional<std::pair<Node, Node>> DecoupleIndirectRead(const OperationNode& operation) {
43 if (operation.GetCode() != OperationCode::UAdd) {
44 return std::nullopt;
45 }
46 Node gpr;
47 Node offset;
48 ASSERT(operation.GetOperandsCount() == 2);
49 for (std::size_t i = 0; i < operation.GetOperandsCount(); i++) {
50 Node operand = operation[i];
51 if (std::holds_alternative<ImmediateNode>(*operand)) {
52 offset = operation[i];
53 } else if (std::holds_alternative<GprNode>(*operand)) {
54 gpr = operation[i];
55 }
56 }
57 if (offset && gpr) {
58 return std::make_pair(gpr, offset);
59 }
60 return std::nullopt;
61}
62
63bool AmendNodeCv(std::size_t amend_index, Node node) {
64 if (const auto operation = std::get_if<OperationNode>(&*node)) {
65 operation->SetAmendIndex(amend_index);
66 return true;
67 }
68 if (const auto conditional = std::get_if<ConditionalNode>(&*node)) {
69 conditional->SetAmendIndex(amend_index);
70 return true;
71 }
72 return false;
73}
74
75} // Anonymous namespace
76
77std::pair<Node, TrackSampler> ShaderIR::TrackBindlessSampler(Node tracked, const NodeBlock& code,
78 s64 cursor) {
79 if (const auto cbuf = std::get_if<CbufNode>(&*tracked)) {
80 const u32 cbuf_index = cbuf->GetIndex();
81
82 // Constant buffer found, test if it's an immediate
83 const auto& offset = cbuf->GetOffset();
84 if (const auto immediate = std::get_if<ImmediateNode>(&*offset)) {
85 auto track = MakeTrackSampler<BindlessSamplerNode>(cbuf_index, immediate->GetValue());
86 return {tracked, track};
87 }
88 if (const auto operation = std::get_if<OperationNode>(&*offset)) {
89 const u32 bound_buffer = registry.GetBoundBuffer();
90 if (bound_buffer != cbuf_index) {
91 return {};
92 }
93 if (const std::optional pair = DecoupleIndirectRead(*operation)) {
94 auto [gpr, base_offset] = *pair;
95 return HandleBindlessIndirectRead(*cbuf, *operation, gpr, base_offset, tracked,
96 code, cursor);
97 }
98 }
99 return {};
100 }
101 if (const auto gpr = std::get_if<GprNode>(&*tracked)) {
102 if (gpr->GetIndex() == Tegra::Shader::Register::ZeroIndex) {
103 return {};
104 }
105 // Reduce the cursor in one to avoid infinite loops when the instruction sets the same
106 // register that it uses as operand
107 const auto [source, new_cursor] = TrackRegister(gpr, code, cursor - 1);
108 if (!source) {
109 return {};
110 }
111 return TrackBindlessSampler(source, code, new_cursor);
112 }
113 if (const auto operation = std::get_if<OperationNode>(&*tracked)) {
114 const OperationNode& op = *operation;
115
116 const OperationCode opcode = operation->GetCode();
117 if (opcode == OperationCode::IBitwiseOr || opcode == OperationCode::UBitwiseOr) {
118 ASSERT(op.GetOperandsCount() == 2);
119 auto [node_a, index_a, offset_a] = TrackCbuf(op[0], code, cursor);
120 auto [node_b, index_b, offset_b] = TrackCbuf(op[1], code, cursor);
121 if (node_a && node_b) {
122 auto track = MakeTrackSampler<SeparateSamplerNode>(std::pair{index_a, index_b},
123 std::pair{offset_a, offset_b});
124 return {tracked, std::move(track)};
125 }
126 }
127 std::size_t i = op.GetOperandsCount();
128 while (i--) {
129 if (auto found = TrackBindlessSampler(op[i - 1], code, cursor); std::get<0>(found)) {
130 // Constant buffer found in operand.
131 return found;
132 }
133 }
134 return {};
135 }
136 if (const auto conditional = std::get_if<ConditionalNode>(&*tracked)) {
137 const auto& conditional_code = conditional->GetCode();
138 return TrackBindlessSampler(tracked, conditional_code,
139 static_cast<s64>(conditional_code.size()));
140 }
141 return {};
142}
143
144std::pair<Node, TrackSampler> ShaderIR::HandleBindlessIndirectRead(
145 const CbufNode& cbuf, const OperationNode& operation, Node gpr, Node base_offset, Node tracked,
146 const NodeBlock& code, s64 cursor) {
147 const auto offset_imm = std::get<ImmediateNode>(*base_offset);
148 const auto& gpu_driver = registry.AccessGuestDriverProfile();
149 const u32 bindless_cv = NewCustomVariable();
150 const u32 texture_handler_size = gpu_driver.GetTextureHandlerSize();
151 Node op = Operation(OperationCode::UDiv, gpr, Immediate(texture_handler_size));
152
153 Node cv_node = GetCustomVariable(bindless_cv);
154 Node amend_op = Operation(OperationCode::Assign, std::move(cv_node), std::move(op));
155 const std::size_t amend_index = DeclareAmend(std::move(amend_op));
156 AmendNodeCv(amend_index, code[cursor]);
157
158 // TODO: Implement bindless index custom variable
159 auto track =
160 MakeTrackSampler<ArraySamplerNode>(cbuf.GetIndex(), offset_imm.GetValue(), bindless_cv);
161 return {tracked, track};
162}
163
164std::tuple<Node, u32, u32> ShaderIR::TrackCbuf(Node tracked, const NodeBlock& code,
165 s64 cursor) const {
166 if (const auto cbuf = std::get_if<CbufNode>(&*tracked)) {
167 // Constant buffer found, test if it's an immediate
168 const auto& offset = cbuf->GetOffset();
169 if (const auto immediate = std::get_if<ImmediateNode>(&*offset)) {
170 return {tracked, cbuf->GetIndex(), immediate->GetValue()};
171 }
172 return {};
173 }
174 if (const auto gpr = std::get_if<GprNode>(&*tracked)) {
175 if (gpr->GetIndex() == Tegra::Shader::Register::ZeroIndex) {
176 return {};
177 }
178 // Reduce the cursor in one to avoid infinite loops when the instruction sets the same
179 // register that it uses as operand
180 const auto [source, new_cursor] = TrackRegister(gpr, code, cursor - 1);
181 if (!source) {
182 return {};
183 }
184 return TrackCbuf(source, code, new_cursor);
185 }
186 if (const auto operation = std::get_if<OperationNode>(&*tracked)) {
187 for (std::size_t i = operation->GetOperandsCount(); i > 0; --i) {
188 if (auto found = TrackCbuf((*operation)[i - 1], code, cursor); std::get<0>(found)) {
189 // Cbuf found in operand.
190 return found;
191 }
192 }
193 return {};
194 }
195 if (const auto conditional = std::get_if<ConditionalNode>(&*tracked)) {
196 const auto& conditional_code = conditional->GetCode();
197 return TrackCbuf(tracked, conditional_code, static_cast<s64>(conditional_code.size()));
198 }
199 return {};
200}
201
202std::optional<u32> ShaderIR::TrackImmediate(Node tracked, const NodeBlock& code, s64 cursor) const {
203 // Reduce the cursor in one to avoid infinite loops when the instruction sets the same register
204 // that it uses as operand
205 const auto result = TrackRegister(&std::get<GprNode>(*tracked), code, cursor - 1);
206 const auto& found = result.first;
207 if (!found) {
208 return std::nullopt;
209 }
210 if (const auto immediate = std::get_if<ImmediateNode>(&*found)) {
211 return immediate->GetValue();
212 }
213 return std::nullopt;
214}
215
216std::pair<Node, s64> ShaderIR::TrackRegister(const GprNode* tracked, const NodeBlock& code,
217 s64 cursor) const {
218 for (; cursor >= 0; --cursor) {
219 const auto [found_node, new_cursor] = FindOperation(code, cursor, OperationCode::Assign);
220 if (!found_node) {
221 return {};
222 }
223 const auto operation = std::get_if<OperationNode>(&*found_node);
224 ASSERT(operation);
225
226 const auto& target = (*operation)[0];
227 if (const auto gpr_target = std::get_if<GprNode>(&*target)) {
228 if (gpr_target->GetIndex() == tracked->GetIndex()) {
229 return {(*operation)[1], new_cursor};
230 }
231 }
232 }
233 return {};
234}
235
236} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/transform_feedback.cpp b/src/video_core/shader/transform_feedback.cpp
deleted file mode 100644
index 22a933761..000000000
--- a/src/video_core/shader/transform_feedback.cpp
+++ /dev/null
@@ -1,115 +0,0 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <algorithm>
6#include <array>
7#include <unordered_map>
8
9#include "common/assert.h"
10#include "common/common_types.h"
11#include "video_core/engines/maxwell_3d.h"
12#include "video_core/shader/registry.h"
13#include "video_core/shader/transform_feedback.h"
14
15namespace VideoCommon::Shader {
16
17namespace {
18
19using Maxwell = Tegra::Engines::Maxwell3D::Regs;
20
21// TODO(Rodrigo): Change this to constexpr std::unordered_set in C++20
22
23/// Attribute offsets that describe a vector
24constexpr std::array VECTORS = {
25 28, // gl_Position
26 32, // Generic 0
27 36, // Generic 1
28 40, // Generic 2
29 44, // Generic 3
30 48, // Generic 4
31 52, // Generic 5
32 56, // Generic 6
33 60, // Generic 7
34 64, // Generic 8
35 68, // Generic 9
36 72, // Generic 10
37 76, // Generic 11
38 80, // Generic 12
39 84, // Generic 13
40 88, // Generic 14
41 92, // Generic 15
42 96, // Generic 16
43 100, // Generic 17
44 104, // Generic 18
45 108, // Generic 19
46 112, // Generic 20
47 116, // Generic 21
48 120, // Generic 22
49 124, // Generic 23
50 128, // Generic 24
51 132, // Generic 25
52 136, // Generic 26
53 140, // Generic 27
54 144, // Generic 28
55 148, // Generic 29
56 152, // Generic 30
57 156, // Generic 31
58 160, // gl_FrontColor
59 164, // gl_FrontSecondaryColor
60 160, // gl_BackColor
61 164, // gl_BackSecondaryColor
62 192, // gl_TexCoord[0]
63 196, // gl_TexCoord[1]
64 200, // gl_TexCoord[2]
65 204, // gl_TexCoord[3]
66 208, // gl_TexCoord[4]
67 212, // gl_TexCoord[5]
68 216, // gl_TexCoord[6]
69 220, // gl_TexCoord[7]
70};
71} // namespace
72
73std::unordered_map<u8, VaryingTFB> BuildTransformFeedback(const GraphicsInfo& info) {
74
75 std::unordered_map<u8, VaryingTFB> tfb;
76
77 for (std::size_t buffer = 0; buffer < Maxwell::NumTransformFeedbackBuffers; ++buffer) {
78 const auto& locations = info.tfb_varying_locs[buffer];
79 const auto& layout = info.tfb_layouts[buffer];
80 const std::size_t varying_count = layout.varying_count;
81
82 std::size_t highest = 0;
83
84 for (std::size_t offset = 0; offset < varying_count; ++offset) {
85 const std::size_t base_offset = offset;
86 const u8 location = locations[offset];
87
88 VaryingTFB varying;
89 varying.buffer = layout.stream;
90 varying.stride = layout.stride;
91 varying.offset = offset * sizeof(u32);
92 varying.components = 1;
93
94 if (std::find(VECTORS.begin(), VECTORS.end(), location / 4 * 4) != VECTORS.end()) {
95 UNIMPLEMENTED_IF_MSG(location % 4 != 0, "Unaligned TFB");
96
97 const u8 base_index = location / 4;
98 while (offset + 1 < varying_count && base_index == locations[offset + 1] / 4) {
99 ++offset;
100 ++varying.components;
101 }
102 }
103
104 [[maybe_unused]] const bool inserted = tfb.emplace(location, varying).second;
105 UNIMPLEMENTED_IF_MSG(!inserted, "Varying already stored");
106
107 highest = std::max(highest, (base_offset + varying.components) * sizeof(u32));
108 }
109
110 UNIMPLEMENTED_IF(highest != layout.stride);
111 }
112 return tfb;
113}
114
115} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/transform_feedback.h b/src/video_core/shader/transform_feedback.h
deleted file mode 100644
index 77d05f64c..000000000
--- a/src/video_core/shader/transform_feedback.h
+++ /dev/null
@@ -1,23 +0,0 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <unordered_map>
8
9#include "common/common_types.h"
10#include "video_core/shader/registry.h"
11
12namespace VideoCommon::Shader {
13
14struct VaryingTFB {
15 std::size_t buffer;
16 std::size_t stride;
17 std::size_t offset;
18 std::size_t components;
19};
20
21std::unordered_map<u8, VaryingTFB> BuildTransformFeedback(const GraphicsInfo& info);
22
23} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader_cache.cpp b/src/video_core/shader_cache.cpp
new file mode 100644
index 000000000..78bf90c48
--- /dev/null
+++ b/src/video_core/shader_cache.cpp
@@ -0,0 +1,250 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <algorithm>
6#include <array>
7#include <vector>
8
9#include "common/assert.h"
10#include "shader_recompiler/frontend/maxwell/control_flow.h"
11#include "shader_recompiler/object_pool.h"
12#include "video_core/dirty_flags.h"
13#include "video_core/engines/kepler_compute.h"
14#include "video_core/engines/maxwell_3d.h"
15#include "video_core/memory_manager.h"
16#include "video_core/shader_cache.h"
17#include "video_core/shader_environment.h"
18
19namespace VideoCommon {
20
21void ShaderCache::InvalidateRegion(VAddr addr, size_t size) {
22 std::scoped_lock lock{invalidation_mutex};
23 InvalidatePagesInRegion(addr, size);
24 RemovePendingShaders();
25}
26
27void ShaderCache::OnCPUWrite(VAddr addr, size_t size) {
28 std::lock_guard lock{invalidation_mutex};
29 InvalidatePagesInRegion(addr, size);
30}
31
32void ShaderCache::SyncGuestHost() {
33 std::scoped_lock lock{invalidation_mutex};
34 RemovePendingShaders();
35}
36
37ShaderCache::ShaderCache(VideoCore::RasterizerInterface& rasterizer_,
38 Tegra::MemoryManager& gpu_memory_, Tegra::Engines::Maxwell3D& maxwell3d_,
39 Tegra::Engines::KeplerCompute& kepler_compute_)
40 : gpu_memory{gpu_memory_}, maxwell3d{maxwell3d_}, kepler_compute{kepler_compute_},
41 rasterizer{rasterizer_} {}
42
43bool ShaderCache::RefreshStages(std::array<u64, 6>& unique_hashes) {
44 auto& dirty{maxwell3d.dirty.flags};
45 if (!dirty[VideoCommon::Dirty::Shaders]) {
46 return last_shaders_valid;
47 }
48 dirty[VideoCommon::Dirty::Shaders] = false;
49
50 const GPUVAddr base_addr{maxwell3d.regs.code_address.CodeAddress()};
51 for (size_t index = 0; index < Tegra::Engines::Maxwell3D::Regs::MaxShaderProgram; ++index) {
52 if (!maxwell3d.regs.IsShaderConfigEnabled(index)) {
53 unique_hashes[index] = 0;
54 continue;
55 }
56 const auto& shader_config{maxwell3d.regs.shader_config[index]};
57 const auto program{static_cast<Tegra::Engines::Maxwell3D::Regs::ShaderProgram>(index)};
58 const GPUVAddr shader_addr{base_addr + shader_config.offset};
59 const std::optional<VAddr> cpu_shader_addr{gpu_memory.GpuToCpuAddress(shader_addr)};
60 if (!cpu_shader_addr) {
61 LOG_ERROR(HW_GPU, "Invalid GPU address for shader 0x{:016x}", shader_addr);
62 last_shaders_valid = false;
63 return false;
64 }
65 const ShaderInfo* shader_info{TryGet(*cpu_shader_addr)};
66 if (!shader_info) {
67 const u32 start_address{shader_config.offset};
68 GraphicsEnvironment env{maxwell3d, gpu_memory, program, base_addr, start_address};
69 shader_info = MakeShaderInfo(env, *cpu_shader_addr);
70 }
71 shader_infos[index] = shader_info;
72 unique_hashes[index] = shader_info->unique_hash;
73 }
74 last_shaders_valid = true;
75 return true;
76}
77
78const ShaderInfo* ShaderCache::ComputeShader() {
79 const GPUVAddr program_base{kepler_compute.regs.code_loc.Address()};
80 const auto& qmd{kepler_compute.launch_description};
81 const GPUVAddr shader_addr{program_base + qmd.program_start};
82 const std::optional<VAddr> cpu_shader_addr{gpu_memory.GpuToCpuAddress(shader_addr)};
83 if (!cpu_shader_addr) {
84 LOG_ERROR(HW_GPU, "Invalid GPU address for shader 0x{:016x}", shader_addr);
85 return nullptr;
86 }
87 if (const ShaderInfo* const shader = TryGet(*cpu_shader_addr)) {
88 return shader;
89 }
90 ComputeEnvironment env{kepler_compute, gpu_memory, program_base, qmd.program_start};
91 return MakeShaderInfo(env, *cpu_shader_addr);
92}
93
94void ShaderCache::GetGraphicsEnvironments(GraphicsEnvironments& result,
95 const std::array<u64, NUM_PROGRAMS>& unique_hashes) {
96 size_t env_index{};
97 const GPUVAddr base_addr{maxwell3d.regs.code_address.CodeAddress()};
98 for (size_t index = 0; index < NUM_PROGRAMS; ++index) {
99 if (unique_hashes[index] == 0) {
100 continue;
101 }
102 const auto program{static_cast<Tegra::Engines::Maxwell3D::Regs::ShaderProgram>(index)};
103 auto& env{result.envs[index]};
104 const u32 start_address{maxwell3d.regs.shader_config[index].offset};
105 env = GraphicsEnvironment{maxwell3d, gpu_memory, program, base_addr, start_address};
106 env.SetCachedSize(shader_infos[index]->size_bytes);
107 result.env_ptrs[env_index++] = &env;
108 }
109}
110
111ShaderInfo* ShaderCache::TryGet(VAddr addr) const {
112 std::scoped_lock lock{lookup_mutex};
113
114 const auto it = lookup_cache.find(addr);
115 if (it == lookup_cache.end()) {
116 return nullptr;
117 }
118 return it->second->data;
119}
120
121void ShaderCache::Register(std::unique_ptr<ShaderInfo> data, VAddr addr, size_t size) {
122 std::scoped_lock lock{invalidation_mutex, lookup_mutex};
123
124 const VAddr addr_end = addr + size;
125 Entry* const entry = NewEntry(addr, addr_end, data.get());
126
127 const u64 page_end = (addr_end + PAGE_SIZE - 1) >> PAGE_BITS;
128 for (u64 page = addr >> PAGE_BITS; page < page_end; ++page) {
129 invalidation_cache[page].push_back(entry);
130 }
131
132 storage.push_back(std::move(data));
133
134 rasterizer.UpdatePagesCachedCount(addr, size, 1);
135}
136
137void ShaderCache::InvalidatePagesInRegion(VAddr addr, size_t size) {
138 const VAddr addr_end = addr + size;
139 const u64 page_end = (addr_end + PAGE_SIZE - 1) >> PAGE_BITS;
140 for (u64 page = addr >> PAGE_BITS; page < page_end; ++page) {
141 auto it = invalidation_cache.find(page);
142 if (it == invalidation_cache.end()) {
143 continue;
144 }
145 InvalidatePageEntries(it->second, addr, addr_end);
146 }
147}
148
149void ShaderCache::RemovePendingShaders() {
150 if (marked_for_removal.empty()) {
151 return;
152 }
153 // Remove duplicates
154 std::ranges::sort(marked_for_removal);
155 marked_for_removal.erase(std::unique(marked_for_removal.begin(), marked_for_removal.end()),
156 marked_for_removal.end());
157
158 std::vector<ShaderInfo*> removed_shaders;
159 removed_shaders.reserve(marked_for_removal.size());
160
161 std::scoped_lock lock{lookup_mutex};
162
163 for (Entry* const entry : marked_for_removal) {
164 removed_shaders.push_back(entry->data);
165
166 const auto it = lookup_cache.find(entry->addr_start);
167 ASSERT(it != lookup_cache.end());
168 lookup_cache.erase(it);
169 }
170 marked_for_removal.clear();
171
172 if (!removed_shaders.empty()) {
173 RemoveShadersFromStorage(std::move(removed_shaders));
174 }
175}
176
177void ShaderCache::InvalidatePageEntries(std::vector<Entry*>& entries, VAddr addr, VAddr addr_end) {
178 size_t index = 0;
179 while (index < entries.size()) {
180 Entry* const entry = entries[index];
181 if (!entry->Overlaps(addr, addr_end)) {
182 ++index;
183 continue;
184 }
185
186 UnmarkMemory(entry);
187 RemoveEntryFromInvalidationCache(entry);
188 marked_for_removal.push_back(entry);
189 }
190}
191
192void ShaderCache::RemoveEntryFromInvalidationCache(const Entry* entry) {
193 const u64 page_end = (entry->addr_end + PAGE_SIZE - 1) >> PAGE_BITS;
194 for (u64 page = entry->addr_start >> PAGE_BITS; page < page_end; ++page) {
195 const auto entries_it = invalidation_cache.find(page);
196 ASSERT(entries_it != invalidation_cache.end());
197 std::vector<Entry*>& entries = entries_it->second;
198
199 const auto entry_it = std::ranges::find(entries, entry);
200 ASSERT(entry_it != entries.end());
201 entries.erase(entry_it);
202 }
203}
204
205void ShaderCache::UnmarkMemory(Entry* entry) {
206 if (!entry->is_memory_marked) {
207 return;
208 }
209 entry->is_memory_marked = false;
210
211 const VAddr addr = entry->addr_start;
212 const size_t size = entry->addr_end - addr;
213 rasterizer.UpdatePagesCachedCount(addr, size, -1);
214}
215
216void ShaderCache::RemoveShadersFromStorage(std::vector<ShaderInfo*> removed_shaders) {
217 // Remove them from the cache
218 std::erase_if(storage, [&removed_shaders](const std::unique_ptr<ShaderInfo>& shader) {
219 return std::ranges::find(removed_shaders, shader.get()) != removed_shaders.end();
220 });
221}
222
223ShaderCache::Entry* ShaderCache::NewEntry(VAddr addr, VAddr addr_end, ShaderInfo* data) {
224 auto entry = std::make_unique<Entry>(Entry{addr, addr_end, data});
225 Entry* const entry_pointer = entry.get();
226
227 lookup_cache.emplace(addr, std::move(entry));
228 return entry_pointer;
229}
230
231const ShaderInfo* ShaderCache::MakeShaderInfo(GenericEnvironment& env, VAddr cpu_addr) {
232 auto info = std::make_unique<ShaderInfo>();
233 if (const std::optional<u64> cached_hash{env.Analyze()}) {
234 info->unique_hash = *cached_hash;
235 info->size_bytes = env.CachedSize();
236 } else {
237 // Slow path, not really hit on commercial games
238 // Build a control flow graph to get the real shader size
239 Shader::ObjectPool<Shader::Maxwell::Flow::Block> flow_block;
240 Shader::Maxwell::Flow::CFG cfg{env, flow_block, env.StartAddress()};
241 info->unique_hash = env.CalculateHash();
242 info->size_bytes = env.ReadSize();
243 }
244 const size_t size_bytes{info->size_bytes};
245 const ShaderInfo* const result{info.get()};
246 Register(std::move(info), cpu_addr, size_bytes);
247 return result;
248}
249
250} // namespace VideoCommon
diff --git a/src/video_core/shader_cache.h b/src/video_core/shader_cache.h
index 015a789d6..136fe294c 100644
--- a/src/video_core/shader_cache.h
+++ b/src/video_core/shader_cache.h
@@ -5,226 +5,147 @@
5#pragma once 5#pragma once
6 6
7#include <algorithm> 7#include <algorithm>
8#include <array>
8#include <memory> 9#include <memory>
9#include <mutex> 10#include <mutex>
11#include <span>
10#include <unordered_map> 12#include <unordered_map>
11#include <utility> 13#include <utility>
12#include <vector> 14#include <vector>
13 15
14#include "common/assert.h"
15#include "common/common_types.h" 16#include "common/common_types.h"
16#include "video_core/rasterizer_interface.h" 17#include "video_core/rasterizer_interface.h"
18#include "video_core/shader_environment.h"
19
20namespace Tegra {
21class MemoryManager;
22}
17 23
18namespace VideoCommon { 24namespace VideoCommon {
19 25
20template <class T> 26class GenericEnvironment;
27
28struct ShaderInfo {
29 u64 unique_hash{};
30 size_t size_bytes{};
31};
32
21class ShaderCache { 33class ShaderCache {
22 static constexpr u64 PAGE_BITS = 14; 34 static constexpr u64 PAGE_BITS = 14;
23 static constexpr u64 PAGE_SIZE = u64(1) << PAGE_BITS; 35 static constexpr u64 PAGE_SIZE = u64(1) << PAGE_BITS;
24 36
37 static constexpr size_t NUM_PROGRAMS = 6;
38
25 struct Entry { 39 struct Entry {
26 VAddr addr_start; 40 VAddr addr_start;
27 VAddr addr_end; 41 VAddr addr_end;
28 T* data; 42 ShaderInfo* data;
29 43
30 bool is_memory_marked = true; 44 bool is_memory_marked = true;
31 45
32 constexpr bool Overlaps(VAddr start, VAddr end) const noexcept { 46 bool Overlaps(VAddr start, VAddr end) const noexcept {
33 return start < addr_end && addr_start < end; 47 return start < addr_end && addr_start < end;
34 } 48 }
35 }; 49 };
36 50
37public: 51public:
38 virtual ~ShaderCache() = default;
39
40 /// @brief Removes shaders inside a given region 52 /// @brief Removes shaders inside a given region
41 /// @note Checks for ranges 53 /// @note Checks for ranges
42 /// @param addr Start address of the invalidation 54 /// @param addr Start address of the invalidation
43 /// @param size Number of bytes of the invalidation 55 /// @param size Number of bytes of the invalidation
44 void InvalidateRegion(VAddr addr, std::size_t size) { 56 void InvalidateRegion(VAddr addr, size_t size);
45 std::scoped_lock lock{invalidation_mutex};
46 InvalidatePagesInRegion(addr, size);
47 RemovePendingShaders();
48 }
49 57
50 /// @brief Unmarks a memory region as cached and marks it for removal 58 /// @brief Unmarks a memory region as cached and marks it for removal
51 /// @param addr Start address of the CPU write operation 59 /// @param addr Start address of the CPU write operation
52 /// @param size Number of bytes of the CPU write operation 60 /// @param size Number of bytes of the CPU write operation
53 void OnCPUWrite(VAddr addr, std::size_t size) { 61 void OnCPUWrite(VAddr addr, size_t size);
54 std::lock_guard lock{invalidation_mutex};
55 InvalidatePagesInRegion(addr, size);
56 }
57 62
58 /// @brief Flushes delayed removal operations 63 /// @brief Flushes delayed removal operations
59 void SyncGuestHost() { 64 void SyncGuestHost();
60 std::scoped_lock lock{invalidation_mutex};
61 RemovePendingShaders();
62 }
63 65
64 /// @brief Tries to obtain a cached shader starting in a given address 66protected:
65 /// @note Doesn't check for ranges, the given address has to be the start of the shader 67 struct GraphicsEnvironments {
66 /// @param addr Start address of the shader, this doesn't cache for region 68 std::array<GraphicsEnvironment, NUM_PROGRAMS> envs;
67 /// @return Pointer to a valid shader, nullptr when nothing is found 69 std::array<Shader::Environment*, NUM_PROGRAMS> env_ptrs;
68 T* TryGet(VAddr addr) const {
69 std::scoped_lock lock{lookup_mutex};
70 70
71 const auto it = lookup_cache.find(addr); 71 std::span<Shader::Environment* const> Span() const noexcept {
72 if (it == lookup_cache.end()) { 72 return std::span(env_ptrs.begin(), std::ranges::find(env_ptrs, nullptr));
73 return nullptr;
74 } 73 }
75 return it->second->data; 74 };
76 }
77
78protected:
79 explicit ShaderCache(VideoCore::RasterizerInterface& rasterizer_) : rasterizer{rasterizer_} {}
80 75
81 /// @brief Register in the cache a given entry 76 explicit ShaderCache(VideoCore::RasterizerInterface& rasterizer_,
82 /// @param data Shader to store in the cache 77 Tegra::MemoryManager& gpu_memory_, Tegra::Engines::Maxwell3D& maxwell3d_,
83 /// @param addr Start address of the shader that will be registered 78 Tegra::Engines::KeplerCompute& kepler_compute_);
84 /// @param size Size in bytes of the shader
85 void Register(std::unique_ptr<T> data, VAddr addr, std::size_t size) {
86 std::scoped_lock lock{invalidation_mutex, lookup_mutex};
87 79
88 const VAddr addr_end = addr + size; 80 /// @brief Update the hashes and information of shader stages
89 Entry* const entry = NewEntry(addr, addr_end, data.get()); 81 /// @param unique_hashes Shader hashes to store into when a stage is enabled
82 /// @return True no success, false on error
83 bool RefreshStages(std::array<u64, NUM_PROGRAMS>& unique_hashes);
90 84
91 const u64 page_end = (addr_end + PAGE_SIZE - 1) >> PAGE_BITS; 85 /// @brief Returns information about the current compute shader
92 for (u64 page = addr >> PAGE_BITS; page < page_end; ++page) { 86 /// @return Pointer to a valid shader, nullptr on error
93 invalidation_cache[page].push_back(entry); 87 const ShaderInfo* ComputeShader();
94 }
95 88
96 storage.push_back(std::move(data)); 89 /// @brief Collect the current graphics environments
90 void GetGraphicsEnvironments(GraphicsEnvironments& result,
91 const std::array<u64, NUM_PROGRAMS>& unique_hashes);
97 92
98 rasterizer.UpdatePagesCachedCount(addr, size, 1); 93 Tegra::MemoryManager& gpu_memory;
99 } 94 Tegra::Engines::Maxwell3D& maxwell3d;
95 Tegra::Engines::KeplerCompute& kepler_compute;
100 96
101 /// @brief Called when a shader is going to be removed 97 std::array<const ShaderInfo*, NUM_PROGRAMS> shader_infos{};
102 /// @param shader Shader that will be removed 98 bool last_shaders_valid = false;
103 /// @pre invalidation_cache is locked
104 /// @pre lookup_mutex is locked
105 virtual void OnShaderRemoval([[maybe_unused]] T* shader) {}
106 99
107private: 100private:
101 /// @brief Tries to obtain a cached shader starting in a given address
102 /// @note Doesn't check for ranges, the given address has to be the start of the shader
103 /// @param addr Start address of the shader, this doesn't cache for region
104 /// @return Pointer to a valid shader, nullptr when nothing is found
105 ShaderInfo* TryGet(VAddr addr) const;
106
107 /// @brief Register in the cache a given entry
108 /// @param data Shader to store in the cache
109 /// @param addr Start address of the shader that will be registered
110 /// @param size Size in bytes of the shader
111 void Register(std::unique_ptr<ShaderInfo> data, VAddr addr, size_t size);
112
108 /// @brief Invalidate pages in a given region 113 /// @brief Invalidate pages in a given region
109 /// @pre invalidation_mutex is locked 114 /// @pre invalidation_mutex is locked
110 void InvalidatePagesInRegion(VAddr addr, std::size_t size) { 115 void InvalidatePagesInRegion(VAddr addr, size_t size);
111 const VAddr addr_end = addr + size;
112 const u64 page_end = (addr_end + PAGE_SIZE - 1) >> PAGE_BITS;
113 for (u64 page = addr >> PAGE_BITS; page < page_end; ++page) {
114 auto it = invalidation_cache.find(page);
115 if (it == invalidation_cache.end()) {
116 continue;
117 }
118 InvalidatePageEntries(it->second, addr, addr_end);
119 }
120 }
121 116
122 /// @brief Remove shaders marked for deletion 117 /// @brief Remove shaders marked for deletion
123 /// @pre invalidation_mutex is locked 118 /// @pre invalidation_mutex is locked
124 void RemovePendingShaders() { 119 void RemovePendingShaders();
125 if (marked_for_removal.empty()) {
126 return;
127 }
128 // Remove duplicates
129 std::sort(marked_for_removal.begin(), marked_for_removal.end());
130 marked_for_removal.erase(std::unique(marked_for_removal.begin(), marked_for_removal.end()),
131 marked_for_removal.end());
132
133 std::vector<T*> removed_shaders;
134 removed_shaders.reserve(marked_for_removal.size());
135
136 std::scoped_lock lock{lookup_mutex};
137
138 for (Entry* const entry : marked_for_removal) {
139 removed_shaders.push_back(entry->data);
140
141 const auto it = lookup_cache.find(entry->addr_start);
142 ASSERT(it != lookup_cache.end());
143 lookup_cache.erase(it);
144 }
145 marked_for_removal.clear();
146
147 if (!removed_shaders.empty()) {
148 RemoveShadersFromStorage(std::move(removed_shaders));
149 }
150 }
151 120
152 /// @brief Invalidates entries in a given range for the passed page 121 /// @brief Invalidates entries in a given range for the passed page
153 /// @param entries Vector of entries in the page, it will be modified on overlaps 122 /// @param entries Vector of entries in the page, it will be modified on overlaps
154 /// @param addr Start address of the invalidation 123 /// @param addr Start address of the invalidation
155 /// @param addr_end Non-inclusive end address of the invalidation 124 /// @param addr_end Non-inclusive end address of the invalidation
156 /// @pre invalidation_mutex is locked 125 /// @pre invalidation_mutex is locked
157 void InvalidatePageEntries(std::vector<Entry*>& entries, VAddr addr, VAddr addr_end) { 126 void InvalidatePageEntries(std::vector<Entry*>& entries, VAddr addr, VAddr addr_end);
158 std::size_t index = 0;
159 while (index < entries.size()) {
160 Entry* const entry = entries[index];
161 if (!entry->Overlaps(addr, addr_end)) {
162 ++index;
163 continue;
164 }
165
166 UnmarkMemory(entry);
167 RemoveEntryFromInvalidationCache(entry);
168 marked_for_removal.push_back(entry);
169 }
170 }
171 127
172 /// @brief Removes all references to an entry in the invalidation cache 128 /// @brief Removes all references to an entry in the invalidation cache
173 /// @param entry Entry to remove from the invalidation cache 129 /// @param entry Entry to remove from the invalidation cache
174 /// @pre invalidation_mutex is locked 130 /// @pre invalidation_mutex is locked
175 void RemoveEntryFromInvalidationCache(const Entry* entry) { 131 void RemoveEntryFromInvalidationCache(const Entry* entry);
176 const u64 page_end = (entry->addr_end + PAGE_SIZE - 1) >> PAGE_BITS;
177 for (u64 page = entry->addr_start >> PAGE_BITS; page < page_end; ++page) {
178 const auto entries_it = invalidation_cache.find(page);
179 ASSERT(entries_it != invalidation_cache.end());
180 std::vector<Entry*>& entries = entries_it->second;
181
182 const auto entry_it = std::find(entries.begin(), entries.end(), entry);
183 ASSERT(entry_it != entries.end());
184 entries.erase(entry_it);
185 }
186 }
187 132
188 /// @brief Unmarks an entry from the rasterizer cache 133 /// @brief Unmarks an entry from the rasterizer cache
189 /// @param entry Entry to unmark from memory 134 /// @param entry Entry to unmark from memory
190 void UnmarkMemory(Entry* entry) { 135 void UnmarkMemory(Entry* entry);
191 if (!entry->is_memory_marked) {
192 return;
193 }
194 entry->is_memory_marked = false;
195
196 const VAddr addr = entry->addr_start;
197 const std::size_t size = entry->addr_end - addr;
198 rasterizer.UpdatePagesCachedCount(addr, size, -1);
199 }
200 136
201 /// @brief Removes a vector of shaders from a list 137 /// @brief Removes a vector of shaders from a list
202 /// @param removed_shaders Shaders to be removed from the storage 138 /// @param removed_shaders Shaders to be removed from the storage
203 /// @pre invalidation_mutex is locked 139 /// @pre invalidation_mutex is locked
204 /// @pre lookup_mutex is locked 140 /// @pre lookup_mutex is locked
205 void RemoveShadersFromStorage(std::vector<T*> removed_shaders) { 141 void RemoveShadersFromStorage(std::vector<ShaderInfo*> removed_shaders);
206 // Notify removals
207 for (T* const shader : removed_shaders) {
208 OnShaderRemoval(shader);
209 }
210
211 // Remove them from the cache
212 const auto is_removed = [&removed_shaders](const std::unique_ptr<T>& shader) {
213 return std::find(removed_shaders.begin(), removed_shaders.end(), shader.get()) !=
214 removed_shaders.end();
215 };
216 std::erase_if(storage, is_removed);
217 }
218 142
219 /// @brief Creates a new entry in the lookup cache and returns its pointer 143 /// @brief Creates a new entry in the lookup cache and returns its pointer
220 /// @pre lookup_mutex is locked 144 /// @pre lookup_mutex is locked
221 Entry* NewEntry(VAddr addr, VAddr addr_end, T* data) { 145 Entry* NewEntry(VAddr addr, VAddr addr_end, ShaderInfo* data);
222 auto entry = std::make_unique<Entry>(Entry{addr, addr_end, data});
223 Entry* const entry_pointer = entry.get();
224 146
225 lookup_cache.emplace(addr, std::move(entry)); 147 /// @brief Create a new shader entry and register it
226 return entry_pointer; 148 const ShaderInfo* MakeShaderInfo(GenericEnvironment& env, VAddr cpu_addr);
227 }
228 149
229 VideoCore::RasterizerInterface& rasterizer; 150 VideoCore::RasterizerInterface& rasterizer;
230 151
@@ -233,7 +154,7 @@ private:
233 154
234 std::unordered_map<u64, std::unique_ptr<Entry>> lookup_cache; 155 std::unordered_map<u64, std::unique_ptr<Entry>> lookup_cache;
235 std::unordered_map<u64, std::vector<Entry*>> invalidation_cache; 156 std::unordered_map<u64, std::vector<Entry*>> invalidation_cache;
236 std::vector<std::unique_ptr<T>> storage; 157 std::vector<std::unique_ptr<ShaderInfo>> storage;
237 std::vector<Entry*> marked_for_removal; 158 std::vector<Entry*> marked_for_removal;
238}; 159};
239 160
diff --git a/src/video_core/shader_environment.cpp b/src/video_core/shader_environment.cpp
new file mode 100644
index 000000000..8a4581c19
--- /dev/null
+++ b/src/video_core/shader_environment.cpp
@@ -0,0 +1,460 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <filesystem>
6#include <fstream>
7#include <memory>
8#include <optional>
9#include <utility>
10
11#include "common/assert.h"
12#include "common/cityhash.h"
13#include "common/common_types.h"
14#include "common/div_ceil.h"
15#include "common/fs/fs.h"
16#include "common/logging/log.h"
17#include "shader_recompiler/environment.h"
18#include "video_core/memory_manager.h"
19#include "video_core/shader_environment.h"
20#include "video_core/textures/texture.h"
21
22namespace VideoCommon {
23
24constexpr std::array<char, 8> MAGIC_NUMBER{'y', 'u', 'z', 'u', 'c', 'a', 'c', 'h'};
25
26constexpr size_t INST_SIZE = sizeof(u64);
27
28using Maxwell = Tegra::Engines::Maxwell3D::Regs;
29
30static u64 MakeCbufKey(u32 index, u32 offset) {
31 return (static_cast<u64>(index) << 32) | offset;
32}
33
34static Shader::TextureType ConvertType(const Tegra::Texture::TICEntry& entry) {
35 switch (entry.texture_type) {
36 case Tegra::Texture::TextureType::Texture1D:
37 return Shader::TextureType::Color1D;
38 case Tegra::Texture::TextureType::Texture2D:
39 case Tegra::Texture::TextureType::Texture2DNoMipmap:
40 return Shader::TextureType::Color2D;
41 case Tegra::Texture::TextureType::Texture3D:
42 return Shader::TextureType::Color3D;
43 case Tegra::Texture::TextureType::TextureCubemap:
44 return Shader::TextureType::ColorCube;
45 case Tegra::Texture::TextureType::Texture1DArray:
46 return Shader::TextureType::ColorArray1D;
47 case Tegra::Texture::TextureType::Texture2DArray:
48 return Shader::TextureType::ColorArray2D;
49 case Tegra::Texture::TextureType::Texture1DBuffer:
50 return Shader::TextureType::Buffer;
51 case Tegra::Texture::TextureType::TextureCubeArray:
52 return Shader::TextureType::ColorArrayCube;
53 default:
54 throw Shader::NotImplementedException("Unknown texture type");
55 }
56}
57
58GenericEnvironment::GenericEnvironment(Tegra::MemoryManager& gpu_memory_, GPUVAddr program_base_,
59 u32 start_address_)
60 : gpu_memory{&gpu_memory_}, program_base{program_base_} {
61 start_address = start_address_;
62}
63
64GenericEnvironment::~GenericEnvironment() = default;
65
66u32 GenericEnvironment::TextureBoundBuffer() const {
67 return texture_bound;
68}
69
70u32 GenericEnvironment::LocalMemorySize() const {
71 return local_memory_size;
72}
73
74u32 GenericEnvironment::SharedMemorySize() const {
75 return shared_memory_size;
76}
77
78std::array<u32, 3> GenericEnvironment::WorkgroupSize() const {
79 return workgroup_size;
80}
81
82u64 GenericEnvironment::ReadInstruction(u32 address) {
83 read_lowest = std::min(read_lowest, address);
84 read_highest = std::max(read_highest, address);
85
86 if (address >= cached_lowest && address < cached_highest) {
87 return code[(address - cached_lowest) / INST_SIZE];
88 }
89 has_unbound_instructions = true;
90 return gpu_memory->Read<u64>(program_base + address);
91}
92
93std::optional<u64> GenericEnvironment::Analyze() {
94 const std::optional<u64> size{TryFindSize()};
95 if (!size) {
96 return std::nullopt;
97 }
98 cached_lowest = start_address;
99 cached_highest = start_address + static_cast<u32>(*size);
100 return Common::CityHash64(reinterpret_cast<const char*>(code.data()), *size);
101}
102
103void GenericEnvironment::SetCachedSize(size_t size_bytes) {
104 cached_lowest = start_address;
105 cached_highest = start_address + static_cast<u32>(size_bytes);
106 code.resize(CachedSize());
107 gpu_memory->ReadBlock(program_base + cached_lowest, code.data(), code.size() * sizeof(u64));
108}
109
110size_t GenericEnvironment::CachedSize() const noexcept {
111 return cached_highest - cached_lowest + INST_SIZE;
112}
113
114size_t GenericEnvironment::ReadSize() const noexcept {
115 return read_highest - read_lowest + INST_SIZE;
116}
117
118bool GenericEnvironment::CanBeSerialized() const noexcept {
119 return !has_unbound_instructions;
120}
121
122u64 GenericEnvironment::CalculateHash() const {
123 const size_t size{ReadSize()};
124 const auto data{std::make_unique<char[]>(size)};
125 gpu_memory->ReadBlock(program_base + read_lowest, data.get(), size);
126 return Common::CityHash64(data.get(), size);
127}
128
129void GenericEnvironment::Serialize(std::ofstream& file) const {
130 const u64 code_size{static_cast<u64>(CachedSize())};
131 const u64 num_texture_types{static_cast<u64>(texture_types.size())};
132 const u64 num_cbuf_values{static_cast<u64>(cbuf_values.size())};
133
134 file.write(reinterpret_cast<const char*>(&code_size), sizeof(code_size))
135 .write(reinterpret_cast<const char*>(&num_texture_types), sizeof(num_texture_types))
136 .write(reinterpret_cast<const char*>(&num_cbuf_values), sizeof(num_cbuf_values))
137 .write(reinterpret_cast<const char*>(&local_memory_size), sizeof(local_memory_size))
138 .write(reinterpret_cast<const char*>(&texture_bound), sizeof(texture_bound))
139 .write(reinterpret_cast<const char*>(&start_address), sizeof(start_address))
140 .write(reinterpret_cast<const char*>(&cached_lowest), sizeof(cached_lowest))
141 .write(reinterpret_cast<const char*>(&cached_highest), sizeof(cached_highest))
142 .write(reinterpret_cast<const char*>(&stage), sizeof(stage))
143 .write(reinterpret_cast<const char*>(code.data()), code_size);
144 for (const auto [key, type] : texture_types) {
145 file.write(reinterpret_cast<const char*>(&key), sizeof(key))
146 .write(reinterpret_cast<const char*>(&type), sizeof(type));
147 }
148 for (const auto [key, type] : cbuf_values) {
149 file.write(reinterpret_cast<const char*>(&key), sizeof(key))
150 .write(reinterpret_cast<const char*>(&type), sizeof(type));
151 }
152 if (stage == Shader::Stage::Compute) {
153 file.write(reinterpret_cast<const char*>(&workgroup_size), sizeof(workgroup_size))
154 .write(reinterpret_cast<const char*>(&shared_memory_size), sizeof(shared_memory_size));
155 } else {
156 file.write(reinterpret_cast<const char*>(&sph), sizeof(sph));
157 if (stage == Shader::Stage::Geometry) {
158 file.write(reinterpret_cast<const char*>(&gp_passthrough_mask),
159 sizeof(gp_passthrough_mask));
160 }
161 }
162}
163
164std::optional<u64> GenericEnvironment::TryFindSize() {
165 static constexpr size_t BLOCK_SIZE = 0x1000;
166 static constexpr size_t MAXIMUM_SIZE = 0x100000;
167
168 static constexpr u64 SELF_BRANCH_A = 0xE2400FFFFF87000FULL;
169 static constexpr u64 SELF_BRANCH_B = 0xE2400FFFFF07000FULL;
170
171 GPUVAddr guest_addr{program_base + start_address};
172 size_t offset{0};
173 size_t size{BLOCK_SIZE};
174 while (size <= MAXIMUM_SIZE) {
175 code.resize(size / INST_SIZE);
176 u64* const data = code.data() + offset / INST_SIZE;
177 gpu_memory->ReadBlock(guest_addr, data, BLOCK_SIZE);
178 for (size_t index = 0; index < BLOCK_SIZE; index += INST_SIZE) {
179 const u64 inst = data[index / INST_SIZE];
180 if (inst == SELF_BRANCH_A || inst == SELF_BRANCH_B) {
181 return offset + index;
182 }
183 }
184 guest_addr += BLOCK_SIZE;
185 size += BLOCK_SIZE;
186 offset += BLOCK_SIZE;
187 }
188 return std::nullopt;
189}
190
191Shader::TextureType GenericEnvironment::ReadTextureTypeImpl(GPUVAddr tic_addr, u32 tic_limit,
192 bool via_header_index, u32 raw) {
193 const auto handle{Tegra::Texture::TexturePair(raw, via_header_index)};
194 const GPUVAddr descriptor_addr{tic_addr + handle.first * sizeof(Tegra::Texture::TICEntry)};
195 Tegra::Texture::TICEntry entry;
196 gpu_memory->ReadBlock(descriptor_addr, &entry, sizeof(entry));
197 const Shader::TextureType result{ConvertType(entry)};
198 texture_types.emplace(raw, result);
199 return result;
200}
201
202GraphicsEnvironment::GraphicsEnvironment(Tegra::Engines::Maxwell3D& maxwell3d_,
203 Tegra::MemoryManager& gpu_memory_,
204 Maxwell::ShaderProgram program, GPUVAddr program_base_,
205 u32 start_address_)
206 : GenericEnvironment{gpu_memory_, program_base_, start_address_}, maxwell3d{&maxwell3d_} {
207 gpu_memory->ReadBlock(program_base + start_address, &sph, sizeof(sph));
208 gp_passthrough_mask = maxwell3d->regs.gp_passthrough_mask;
209 switch (program) {
210 case Maxwell::ShaderProgram::VertexA:
211 stage = Shader::Stage::VertexA;
212 stage_index = 0;
213 break;
214 case Maxwell::ShaderProgram::VertexB:
215 stage = Shader::Stage::VertexB;
216 stage_index = 0;
217 break;
218 case Maxwell::ShaderProgram::TesselationControl:
219 stage = Shader::Stage::TessellationControl;
220 stage_index = 1;
221 break;
222 case Maxwell::ShaderProgram::TesselationEval:
223 stage = Shader::Stage::TessellationEval;
224 stage_index = 2;
225 break;
226 case Maxwell::ShaderProgram::Geometry:
227 stage = Shader::Stage::Geometry;
228 stage_index = 3;
229 break;
230 case Maxwell::ShaderProgram::Fragment:
231 stage = Shader::Stage::Fragment;
232 stage_index = 4;
233 break;
234 default:
235 UNREACHABLE_MSG("Invalid program={}", program);
236 break;
237 }
238 const u64 local_size{sph.LocalMemorySize()};
239 ASSERT(local_size <= std::numeric_limits<u32>::max());
240 local_memory_size = static_cast<u32>(local_size) + sph.common3.shader_local_memory_crs_size;
241 texture_bound = maxwell3d->regs.tex_cb_index;
242}
243
244u32 GraphicsEnvironment::ReadCbufValue(u32 cbuf_index, u32 cbuf_offset) {
245 const auto& cbuf{maxwell3d->state.shader_stages[stage_index].const_buffers[cbuf_index]};
246 ASSERT(cbuf.enabled);
247 u32 value{};
248 if (cbuf_offset < cbuf.size) {
249 value = gpu_memory->Read<u32>(cbuf.address + cbuf_offset);
250 }
251 cbuf_values.emplace(MakeCbufKey(cbuf_index, cbuf_offset), value);
252 return value;
253}
254
255Shader::TextureType GraphicsEnvironment::ReadTextureType(u32 handle) {
256 const auto& regs{maxwell3d->regs};
257 const bool via_header_index{regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex};
258 return ReadTextureTypeImpl(regs.tic.Address(), regs.tic.limit, via_header_index, handle);
259}
260
261ComputeEnvironment::ComputeEnvironment(Tegra::Engines::KeplerCompute& kepler_compute_,
262 Tegra::MemoryManager& gpu_memory_, GPUVAddr program_base_,
263 u32 start_address_)
264 : GenericEnvironment{gpu_memory_, program_base_, start_address_}, kepler_compute{
265 &kepler_compute_} {
266 const auto& qmd{kepler_compute->launch_description};
267 stage = Shader::Stage::Compute;
268 local_memory_size = qmd.local_pos_alloc + qmd.local_crs_alloc;
269 texture_bound = kepler_compute->regs.tex_cb_index;
270 shared_memory_size = qmd.shared_alloc;
271 workgroup_size = {qmd.block_dim_x, qmd.block_dim_y, qmd.block_dim_z};
272}
273
274u32 ComputeEnvironment::ReadCbufValue(u32 cbuf_index, u32 cbuf_offset) {
275 const auto& qmd{kepler_compute->launch_description};
276 ASSERT(((qmd.const_buffer_enable_mask.Value() >> cbuf_index) & 1) != 0);
277 const auto& cbuf{qmd.const_buffer_config[cbuf_index]};
278 u32 value{};
279 if (cbuf_offset < cbuf.size) {
280 value = gpu_memory->Read<u32>(cbuf.Address() + cbuf_offset);
281 }
282 cbuf_values.emplace(MakeCbufKey(cbuf_index, cbuf_offset), value);
283 return value;
284}
285
286Shader::TextureType ComputeEnvironment::ReadTextureType(u32 handle) {
287 const auto& regs{kepler_compute->regs};
288 const auto& qmd{kepler_compute->launch_description};
289 return ReadTextureTypeImpl(regs.tic.Address(), regs.tic.limit, qmd.linked_tsc != 0, handle);
290}
291
292void FileEnvironment::Deserialize(std::ifstream& file) {
293 u64 code_size{};
294 u64 num_texture_types{};
295 u64 num_cbuf_values{};
296 file.read(reinterpret_cast<char*>(&code_size), sizeof(code_size))
297 .read(reinterpret_cast<char*>(&num_texture_types), sizeof(num_texture_types))
298 .read(reinterpret_cast<char*>(&num_cbuf_values), sizeof(num_cbuf_values))
299 .read(reinterpret_cast<char*>(&local_memory_size), sizeof(local_memory_size))
300 .read(reinterpret_cast<char*>(&texture_bound), sizeof(texture_bound))
301 .read(reinterpret_cast<char*>(&start_address), sizeof(start_address))
302 .read(reinterpret_cast<char*>(&read_lowest), sizeof(read_lowest))
303 .read(reinterpret_cast<char*>(&read_highest), sizeof(read_highest))
304 .read(reinterpret_cast<char*>(&stage), sizeof(stage));
305 code = std::make_unique<u64[]>(Common::DivCeil(code_size, sizeof(u64)));
306 file.read(reinterpret_cast<char*>(code.get()), code_size);
307 for (size_t i = 0; i < num_texture_types; ++i) {
308 u32 key;
309 Shader::TextureType type;
310 file.read(reinterpret_cast<char*>(&key), sizeof(key))
311 .read(reinterpret_cast<char*>(&type), sizeof(type));
312 texture_types.emplace(key, type);
313 }
314 for (size_t i = 0; i < num_cbuf_values; ++i) {
315 u64 key;
316 u32 value;
317 file.read(reinterpret_cast<char*>(&key), sizeof(key))
318 .read(reinterpret_cast<char*>(&value), sizeof(value));
319 cbuf_values.emplace(key, value);
320 }
321 if (stage == Shader::Stage::Compute) {
322 file.read(reinterpret_cast<char*>(&workgroup_size), sizeof(workgroup_size))
323 .read(reinterpret_cast<char*>(&shared_memory_size), sizeof(shared_memory_size));
324 } else {
325 file.read(reinterpret_cast<char*>(&sph), sizeof(sph));
326 if (stage == Shader::Stage::Geometry) {
327 file.read(reinterpret_cast<char*>(&gp_passthrough_mask), sizeof(gp_passthrough_mask));
328 }
329 }
330}
331
332u64 FileEnvironment::ReadInstruction(u32 address) {
333 if (address < read_lowest || address > read_highest) {
334 throw Shader::LogicError("Out of bounds address {}", address);
335 }
336 return code[(address - read_lowest) / sizeof(u64)];
337}
338
339u32 FileEnvironment::ReadCbufValue(u32 cbuf_index, u32 cbuf_offset) {
340 const auto it{cbuf_values.find(MakeCbufKey(cbuf_index, cbuf_offset))};
341 if (it == cbuf_values.end()) {
342 throw Shader::LogicError("Uncached read texture type");
343 }
344 return it->second;
345}
346
347Shader::TextureType FileEnvironment::ReadTextureType(u32 handle) {
348 const auto it{texture_types.find(handle)};
349 if (it == texture_types.end()) {
350 throw Shader::LogicError("Uncached read texture type");
351 }
352 return it->second;
353}
354
355u32 FileEnvironment::LocalMemorySize() const {
356 return local_memory_size;
357}
358
359u32 FileEnvironment::SharedMemorySize() const {
360 return shared_memory_size;
361}
362
363u32 FileEnvironment::TextureBoundBuffer() const {
364 return texture_bound;
365}
366
367std::array<u32, 3> FileEnvironment::WorkgroupSize() const {
368 return workgroup_size;
369}
370
371void SerializePipeline(std::span<const char> key, std::span<const GenericEnvironment* const> envs,
372 const std::filesystem::path& filename, u32 cache_version) try {
373 std::ofstream file(filename, std::ios::binary | std::ios::ate | std::ios::app);
374 file.exceptions(std::ifstream::failbit);
375 if (!file.is_open()) {
376 LOG_ERROR(Common_Filesystem, "Failed to open pipeline cache file {}",
377 Common::FS::PathToUTF8String(filename));
378 return;
379 }
380 if (file.tellp() == 0) {
381 // Write header
382 file.write(MAGIC_NUMBER.data(), MAGIC_NUMBER.size())
383 .write(reinterpret_cast<const char*>(&cache_version), sizeof(cache_version));
384 }
385 if (!std::ranges::all_of(envs, &GenericEnvironment::CanBeSerialized)) {
386 return;
387 }
388 const u32 num_envs{static_cast<u32>(envs.size())};
389 file.write(reinterpret_cast<const char*>(&num_envs), sizeof(num_envs));
390 for (const GenericEnvironment* const env : envs) {
391 env->Serialize(file);
392 }
393 file.write(key.data(), key.size_bytes());
394
395} catch (const std::ios_base::failure& e) {
396 LOG_ERROR(Common_Filesystem, "{}", e.what());
397 if (!Common::FS::RemoveFile(filename)) {
398 LOG_ERROR(Common_Filesystem, "Failed to delete pipeline cache file {}",
399 Common::FS::PathToUTF8String(filename));
400 }
401}
402
403void LoadPipelines(
404 std::stop_token stop_loading, const std::filesystem::path& filename, u32 expected_cache_version,
405 Common::UniqueFunction<void, std::ifstream&, FileEnvironment> load_compute,
406 Common::UniqueFunction<void, std::ifstream&, std::vector<FileEnvironment>> load_graphics) try {
407 std::ifstream file(filename, std::ios::binary | std::ios::ate);
408 if (!file.is_open()) {
409 return;
410 }
411 file.exceptions(std::ifstream::failbit);
412 const auto end{file.tellg()};
413 file.seekg(0, std::ios::beg);
414
415 std::array<char, 8> magic_number;
416 u32 cache_version;
417 file.read(magic_number.data(), magic_number.size())
418 .read(reinterpret_cast<char*>(&cache_version), sizeof(cache_version));
419 if (magic_number != MAGIC_NUMBER || cache_version != expected_cache_version) {
420 file.close();
421 if (Common::FS::RemoveFile(filename)) {
422 if (magic_number != MAGIC_NUMBER) {
423 LOG_ERROR(Common_Filesystem, "Invalid pipeline cache file");
424 }
425 if (cache_version != expected_cache_version) {
426 LOG_INFO(Common_Filesystem, "Deleting old pipeline cache");
427 }
428 } else {
429 LOG_ERROR(Common_Filesystem,
430 "Invalid pipeline cache file and failed to delete it in \"{}\"",
431 Common::FS::PathToUTF8String(filename));
432 }
433 return;
434 }
435 while (file.tellg() != end) {
436 if (stop_loading.stop_requested()) {
437 return;
438 }
439 u32 num_envs{};
440 file.read(reinterpret_cast<char*>(&num_envs), sizeof(num_envs));
441 std::vector<FileEnvironment> envs(num_envs);
442 for (FileEnvironment& env : envs) {
443 env.Deserialize(file);
444 }
445 if (envs.front().ShaderStage() == Shader::Stage::Compute) {
446 load_compute(file, std::move(envs.front()));
447 } else {
448 load_graphics(file, std::move(envs));
449 }
450 }
451
452} catch (const std::ios_base::failure& e) {
453 LOG_ERROR(Common_Filesystem, "{}", e.what());
454 if (!Common::FS::RemoveFile(filename)) {
455 LOG_ERROR(Common_Filesystem, "Failed to delete pipeline cache file {}",
456 Common::FS::PathToUTF8String(filename));
457 }
458}
459
460} // namespace VideoCommon
diff --git a/src/video_core/shader_environment.h b/src/video_core/shader_environment.h
new file mode 100644
index 000000000..2079979db
--- /dev/null
+++ b/src/video_core/shader_environment.h
@@ -0,0 +1,183 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <array>
8#include <atomic>
9#include <filesystem>
10#include <iosfwd>
11#include <limits>
12#include <memory>
13#include <optional>
14#include <span>
15#include <type_traits>
16#include <unordered_map>
17#include <vector>
18
19#include "common/common_types.h"
20#include "common/unique_function.h"
21#include "shader_recompiler/environment.h"
22#include "video_core/engines/kepler_compute.h"
23#include "video_core/engines/maxwell_3d.h"
24#include "video_core/textures/texture.h"
25
26namespace Tegra {
27class Memorymanager;
28}
29
30namespace VideoCommon {
31
32class GenericEnvironment : public Shader::Environment {
33public:
34 explicit GenericEnvironment() = default;
35 explicit GenericEnvironment(Tegra::MemoryManager& gpu_memory_, GPUVAddr program_base_,
36 u32 start_address_);
37
38 ~GenericEnvironment() override;
39
40 [[nodiscard]] u32 TextureBoundBuffer() const final;
41
42 [[nodiscard]] u32 LocalMemorySize() const final;
43
44 [[nodiscard]] u32 SharedMemorySize() const final;
45
46 [[nodiscard]] std::array<u32, 3> WorkgroupSize() const final;
47
48 [[nodiscard]] u64 ReadInstruction(u32 address) final;
49
50 [[nodiscard]] std::optional<u64> Analyze();
51
52 void SetCachedSize(size_t size_bytes);
53
54 [[nodiscard]] size_t CachedSize() const noexcept;
55
56 [[nodiscard]] size_t ReadSize() const noexcept;
57
58 [[nodiscard]] bool CanBeSerialized() const noexcept;
59
60 [[nodiscard]] u64 CalculateHash() const;
61
62 void Serialize(std::ofstream& file) const;
63
64protected:
65 std::optional<u64> TryFindSize();
66
67 Shader::TextureType ReadTextureTypeImpl(GPUVAddr tic_addr, u32 tic_limit, bool via_header_index,
68 u32 raw);
69
70 Tegra::MemoryManager* gpu_memory{};
71 GPUVAddr program_base{};
72
73 std::vector<u64> code;
74 std::unordered_map<u32, Shader::TextureType> texture_types;
75 std::unordered_map<u64, u32> cbuf_values;
76
77 u32 local_memory_size{};
78 u32 texture_bound{};
79 u32 shared_memory_size{};
80 std::array<u32, 3> workgroup_size{};
81
82 u32 read_lowest = std::numeric_limits<u32>::max();
83 u32 read_highest = 0;
84
85 u32 cached_lowest = std::numeric_limits<u32>::max();
86 u32 cached_highest = 0;
87
88 bool has_unbound_instructions = false;
89};
90
91class GraphicsEnvironment final : public GenericEnvironment {
92public:
93 explicit GraphicsEnvironment() = default;
94 explicit GraphicsEnvironment(Tegra::Engines::Maxwell3D& maxwell3d_,
95 Tegra::MemoryManager& gpu_memory_,
96 Tegra::Engines::Maxwell3D::Regs::ShaderProgram program,
97 GPUVAddr program_base_, u32 start_address_);
98
99 ~GraphicsEnvironment() override = default;
100
101 u32 ReadCbufValue(u32 cbuf_index, u32 cbuf_offset) override;
102
103 Shader::TextureType ReadTextureType(u32 handle) override;
104
105private:
106 Tegra::Engines::Maxwell3D* maxwell3d{};
107 size_t stage_index{};
108};
109
110class ComputeEnvironment final : public GenericEnvironment {
111public:
112 explicit ComputeEnvironment() = default;
113 explicit ComputeEnvironment(Tegra::Engines::KeplerCompute& kepler_compute_,
114 Tegra::MemoryManager& gpu_memory_, GPUVAddr program_base_,
115 u32 start_address_);
116
117 ~ComputeEnvironment() override = default;
118
119 u32 ReadCbufValue(u32 cbuf_index, u32 cbuf_offset) override;
120
121 Shader::TextureType ReadTextureType(u32 handle) override;
122
123private:
124 Tegra::Engines::KeplerCompute* kepler_compute{};
125};
126
127class FileEnvironment final : public Shader::Environment {
128public:
129 FileEnvironment() = default;
130 ~FileEnvironment() override = default;
131
132 FileEnvironment& operator=(FileEnvironment&&) noexcept = default;
133 FileEnvironment(FileEnvironment&&) noexcept = default;
134
135 FileEnvironment& operator=(const FileEnvironment&) = delete;
136 FileEnvironment(const FileEnvironment&) = delete;
137
138 void Deserialize(std::ifstream& file);
139
140 [[nodiscard]] u64 ReadInstruction(u32 address) override;
141
142 [[nodiscard]] u32 ReadCbufValue(u32 cbuf_index, u32 cbuf_offset) override;
143
144 [[nodiscard]] Shader::TextureType ReadTextureType(u32 handle) override;
145
146 [[nodiscard]] u32 LocalMemorySize() const override;
147
148 [[nodiscard]] u32 SharedMemorySize() const override;
149
150 [[nodiscard]] u32 TextureBoundBuffer() const override;
151
152 [[nodiscard]] std::array<u32, 3> WorkgroupSize() const override;
153
154private:
155 std::unique_ptr<u64[]> code;
156 std::unordered_map<u32, Shader::TextureType> texture_types;
157 std::unordered_map<u64, u32> cbuf_values;
158 std::array<u32, 3> workgroup_size{};
159 u32 local_memory_size{};
160 u32 shared_memory_size{};
161 u32 texture_bound{};
162 u32 read_lowest{};
163 u32 read_highest{};
164};
165
166void SerializePipeline(std::span<const char> key, std::span<const GenericEnvironment* const> envs,
167 const std::filesystem::path& filename, u32 cache_version);
168
169template <typename Key, typename Envs>
170void SerializePipeline(const Key& key, const Envs& envs, const std::filesystem::path& filename,
171 u32 cache_version) {
172 static_assert(std::is_trivially_copyable_v<Key>);
173 static_assert(std::has_unique_object_representations_v<Key>);
174 SerializePipeline(std::span(reinterpret_cast<const char*>(&key), sizeof(key)),
175 std::span(envs.data(), envs.size()), filename, cache_version);
176}
177
178void LoadPipelines(
179 std::stop_token stop_loading, const std::filesystem::path& filename, u32 expected_cache_version,
180 Common::UniqueFunction<void, std::ifstream&, FileEnvironment> load_compute,
181 Common::UniqueFunction<void, std::ifstream&, std::vector<FileEnvironment>> load_graphics);
182
183} // namespace VideoCommon
diff --git a/src/video_core/shader_notify.cpp b/src/video_core/shader_notify.cpp
index 693e47158..dc6995b46 100644
--- a/src/video_core/shader_notify.cpp
+++ b/src/video_core/shader_notify.cpp
@@ -2,42 +2,35 @@
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include <mutex> 5#include <atomic>
6#include <chrono>
7#include <optional>
8
6#include "video_core/shader_notify.h" 9#include "video_core/shader_notify.h"
7 10
8using namespace std::chrono_literals; 11using namespace std::chrono_literals;
9 12
10namespace VideoCore { 13namespace VideoCore {
11namespace {
12constexpr auto UPDATE_TICK = 32ms;
13}
14
15ShaderNotify::ShaderNotify() = default;
16ShaderNotify::~ShaderNotify() = default;
17 14
18std::size_t ShaderNotify::GetShadersBuilding() { 15const auto TIME_TO_STOP_REPORTING = 2s;
19 const auto now = std::chrono::high_resolution_clock::now(); 16
20 const auto diff = now - last_update; 17int ShaderNotify::ShadersBuilding() noexcept {
21 if (diff > UPDATE_TICK) { 18 const int now_complete = num_complete.load(std::memory_order::relaxed);
22 std::shared_lock lock(mutex); 19 const int now_building = num_building.load(std::memory_order::relaxed);
23 last_updated_count = accurate_count; 20 if (now_complete == now_building) {
21 const auto now = std::chrono::high_resolution_clock::now();
22 if (completed && num_complete == num_when_completed) {
23 if (now - complete_time > TIME_TO_STOP_REPORTING) {
24 report_base = now_complete;
25 completed = false;
26 }
27 } else {
28 completed = true;
29 num_when_completed = num_complete;
30 complete_time = now;
31 }
24 } 32 }
25 return last_updated_count; 33 return now_building - report_base;
26}
27
28std::size_t ShaderNotify::GetShadersBuildingAccurate() {
29 std::shared_lock lock{mutex};
30 return accurate_count;
31}
32
33void ShaderNotify::MarkShaderComplete() {
34 std::unique_lock lock{mutex};
35 accurate_count--;
36}
37
38void ShaderNotify::MarkSharderBuilding() {
39 std::unique_lock lock{mutex};
40 accurate_count++;
41} 34}
42 35
43} // namespace VideoCore 36} // namespace VideoCore
diff --git a/src/video_core/shader_notify.h b/src/video_core/shader_notify.h
index a9c92d179..ad363bfb5 100644
--- a/src/video_core/shader_notify.h
+++ b/src/video_core/shader_notify.h
@@ -4,26 +4,30 @@
4 4
5#pragma once 5#pragma once
6 6
7#include <atomic>
7#include <chrono> 8#include <chrono>
8#include <shared_mutex> 9#include <optional>
9#include "common/common_types.h"
10 10
11namespace VideoCore { 11namespace VideoCore {
12class ShaderNotify { 12class ShaderNotify {
13public: 13public:
14 ShaderNotify(); 14 [[nodiscard]] int ShadersBuilding() noexcept;
15 ~ShaderNotify();
16 15
17 std::size_t GetShadersBuilding(); 16 void MarkShaderComplete() noexcept {
18 std::size_t GetShadersBuildingAccurate(); 17 ++num_complete;
18 }
19 19
20 void MarkShaderComplete(); 20 void MarkShaderBuilding() noexcept {
21 void MarkSharderBuilding(); 21 ++num_building;
22 }
22 23
23private: 24private:
24 std::size_t last_updated_count{}; 25 std::atomic_int num_building{};
25 std::size_t accurate_count{}; 26 std::atomic_int num_complete{};
26 std::shared_mutex mutex; 27 int report_base{};
27 std::chrono::high_resolution_clock::time_point last_update{}; 28
29 bool completed{};
30 int num_when_completed{};
31 std::chrono::high_resolution_clock::time_point complete_time;
28}; 32};
29} // namespace VideoCore 33} // namespace VideoCore
diff --git a/src/video_core/texture_cache/formatter.cpp b/src/video_core/texture_cache/formatter.cpp
index d10ba4ccd..249cc4d0f 100644
--- a/src/video_core/texture_cache/formatter.cpp
+++ b/src/video_core/texture_cache/formatter.cpp
@@ -43,7 +43,7 @@ std::string Name(const ImageBase& image) {
43 return "Invalid"; 43 return "Invalid";
44} 44}
45 45
46std::string Name(const ImageViewBase& image_view, std::optional<ImageViewType> type) { 46std::string Name(const ImageViewBase& image_view) {
47 const u32 width = image_view.size.width; 47 const u32 width = image_view.size.width;
48 const u32 height = image_view.size.height; 48 const u32 height = image_view.size.height;
49 const u32 depth = image_view.size.depth; 49 const u32 depth = image_view.size.depth;
@@ -51,7 +51,7 @@ std::string Name(const ImageViewBase& image_view, std::optional<ImageViewType> t
51 const u32 num_layers = image_view.range.extent.layers; 51 const u32 num_layers = image_view.range.extent.layers;
52 52
53 const std::string level = num_levels > 1 ? fmt::format(":{}", num_levels) : ""; 53 const std::string level = num_levels > 1 ? fmt::format(":{}", num_levels) : "";
54 switch (type.value_or(image_view.type)) { 54 switch (image_view.type) {
55 case ImageViewType::e1D: 55 case ImageViewType::e1D:
56 return fmt::format("ImageView 1D {}{}", width, level); 56 return fmt::format("ImageView 1D {}{}", width, level);
57 case ImageViewType::e2D: 57 case ImageViewType::e2D:
diff --git a/src/video_core/texture_cache/formatter.h b/src/video_core/texture_cache/formatter.h
index a48413983..c6cf0583f 100644
--- a/src/video_core/texture_cache/formatter.h
+++ b/src/video_core/texture_cache/formatter.h
@@ -255,8 +255,7 @@ struct RenderTargets;
255 255
256[[nodiscard]] std::string Name(const ImageBase& image); 256[[nodiscard]] std::string Name(const ImageBase& image);
257 257
258[[nodiscard]] std::string Name(const ImageViewBase& image_view, 258[[nodiscard]] std::string Name(const ImageViewBase& image_view);
259 std::optional<ImageViewType> type = std::nullopt);
260 259
261[[nodiscard]] std::string Name(const RenderTargets& render_targets); 260[[nodiscard]] std::string Name(const RenderTargets& render_targets);
262 261
diff --git a/src/video_core/texture_cache/image_view_base.cpp b/src/video_core/texture_cache/image_view_base.cpp
index e8d632f9e..450becbeb 100644
--- a/src/video_core/texture_cache/image_view_base.cpp
+++ b/src/video_core/texture_cache/image_view_base.cpp
@@ -36,6 +36,15 @@ ImageViewBase::ImageViewBase(const ImageViewInfo& info, const ImageInfo& image_i
36 } 36 }
37} 37}
38 38
39ImageViewBase::ImageViewBase(const ImageInfo& info, const ImageViewInfo& view_info)
40 : format{info.format}, type{ImageViewType::Buffer}, size{
41 .width = info.size.width,
42 .height = 1,
43 .depth = 1,
44 } {
45 ASSERT_MSG(view_info.type == ImageViewType::Buffer, "Expected texture buffer");
46}
47
39ImageViewBase::ImageViewBase(const NullImageParams&) {} 48ImageViewBase::ImageViewBase(const NullImageParams&) {}
40 49
41} // namespace VideoCommon 50} // namespace VideoCommon
diff --git a/src/video_core/texture_cache/image_view_base.h b/src/video_core/texture_cache/image_view_base.h
index 73954167e..903f715c5 100644
--- a/src/video_core/texture_cache/image_view_base.h
+++ b/src/video_core/texture_cache/image_view_base.h
@@ -27,6 +27,7 @@ DECLARE_ENUM_FLAG_OPERATORS(ImageViewFlagBits)
27struct ImageViewBase { 27struct ImageViewBase {
28 explicit ImageViewBase(const ImageViewInfo& info, const ImageInfo& image_info, 28 explicit ImageViewBase(const ImageViewInfo& info, const ImageInfo& image_info,
29 ImageId image_id); 29 ImageId image_id);
30 explicit ImageViewBase(const ImageInfo& info, const ImageViewInfo& view_info);
30 explicit ImageViewBase(const NullImageParams&); 31 explicit ImageViewBase(const NullImageParams&);
31 32
32 [[nodiscard]] bool IsBuffer() const noexcept { 33 [[nodiscard]] bool IsBuffer() const noexcept {
diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h
index 85ce06d56..f34c9d9ca 100644
--- a/src/video_core/texture_cache/texture_cache.h
+++ b/src/video_core/texture_cache/texture_cache.h
@@ -117,6 +117,9 @@ public:
117 /// Return a reference to the given image view id 117 /// Return a reference to the given image view id
118 [[nodiscard]] ImageView& GetImageView(ImageViewId id) noexcept; 118 [[nodiscard]] ImageView& GetImageView(ImageViewId id) noexcept;
119 119
120 /// Mark an image as modified from the GPU
121 void MarkModification(ImageId id) noexcept;
122
120 /// Fill image_view_ids with the graphics images in indices 123 /// Fill image_view_ids with the graphics images in indices
121 void FillGraphicsImageViews(std::span<const u32> indices, 124 void FillGraphicsImageViews(std::span<const u32> indices,
122 std::span<ImageViewId> image_view_ids); 125 std::span<ImageViewId> image_view_ids);
@@ -527,6 +530,11 @@ typename P::ImageView& TextureCache<P>::GetImageView(ImageViewId id) noexcept {
527} 530}
528 531
529template <class P> 532template <class P>
533void TextureCache<P>::MarkModification(ImageId id) noexcept {
534 MarkModification(slot_images[id]);
535}
536
537template <class P>
530void TextureCache<P>::FillGraphicsImageViews(std::span<const u32> indices, 538void TextureCache<P>::FillGraphicsImageViews(std::span<const u32> indices,
531 std::span<ImageViewId> image_view_ids) { 539 std::span<ImageViewId> image_view_ids) {
532 FillImageViews(graphics_image_table, graphics_image_view_ids, indices, image_view_ids); 540 FillImageViews(graphics_image_table, graphics_image_view_ids, indices, image_view_ids);
@@ -540,13 +548,13 @@ void TextureCache<P>::FillComputeImageViews(std::span<const u32> indices,
540 548
541template <class P> 549template <class P>
542typename P::Sampler* TextureCache<P>::GetGraphicsSampler(u32 index) { 550typename P::Sampler* TextureCache<P>::GetGraphicsSampler(u32 index) {
543 [[unlikely]] if (index > graphics_sampler_table.Limit()) { 551 if (index > graphics_sampler_table.Limit()) {
544 LOG_ERROR(HW_GPU, "Invalid sampler index={}", index); 552 LOG_DEBUG(HW_GPU, "Invalid sampler index={}", index);
545 return &slot_samplers[NULL_SAMPLER_ID]; 553 return &slot_samplers[NULL_SAMPLER_ID];
546 } 554 }
547 const auto [descriptor, is_new] = graphics_sampler_table.Read(index); 555 const auto [descriptor, is_new] = graphics_sampler_table.Read(index);
548 SamplerId& id = graphics_sampler_ids[index]; 556 SamplerId& id = graphics_sampler_ids[index];
549 [[unlikely]] if (is_new) { 557 if (is_new) {
550 id = FindSampler(descriptor); 558 id = FindSampler(descriptor);
551 } 559 }
552 return &slot_samplers[id]; 560 return &slot_samplers[id];
@@ -554,13 +562,13 @@ typename P::Sampler* TextureCache<P>::GetGraphicsSampler(u32 index) {
554 562
555template <class P> 563template <class P>
556typename P::Sampler* TextureCache<P>::GetComputeSampler(u32 index) { 564typename P::Sampler* TextureCache<P>::GetComputeSampler(u32 index) {
557 [[unlikely]] if (index > compute_sampler_table.Limit()) { 565 if (index > compute_sampler_table.Limit()) {
558 LOG_ERROR(HW_GPU, "Invalid sampler index={}", index); 566 LOG_DEBUG(HW_GPU, "Invalid sampler index={}", index);
559 return &slot_samplers[NULL_SAMPLER_ID]; 567 return &slot_samplers[NULL_SAMPLER_ID];
560 } 568 }
561 const auto [descriptor, is_new] = compute_sampler_table.Read(index); 569 const auto [descriptor, is_new] = compute_sampler_table.Read(index);
562 SamplerId& id = compute_sampler_ids[index]; 570 SamplerId& id = compute_sampler_ids[index];
563 [[unlikely]] if (is_new) { 571 if (is_new) {
564 id = FindSampler(descriptor); 572 id = FindSampler(descriptor);
565 } 573 }
566 return &slot_samplers[id]; 574 return &slot_samplers[id];
@@ -661,7 +669,7 @@ ImageViewId TextureCache<P>::VisitImageView(DescriptorTable<TICEntry>& table,
661 std::span<ImageViewId> cached_image_view_ids, 669 std::span<ImageViewId> cached_image_view_ids,
662 u32 index) { 670 u32 index) {
663 if (index > table.Limit()) { 671 if (index > table.Limit()) {
664 LOG_ERROR(HW_GPU, "Invalid image view index={}", index); 672 LOG_DEBUG(HW_GPU, "Invalid image view index={}", index);
665 return NULL_IMAGE_VIEW_ID; 673 return NULL_IMAGE_VIEW_ID;
666 } 674 }
667 const auto [descriptor, is_new] = table.Read(index); 675 const auto [descriptor, is_new] = table.Read(index);
@@ -968,9 +976,6 @@ void TextureCache<P>::UploadImageContents(Image& image, StagingBuffer& staging)
968 auto copies = UnswizzleImage(gpu_memory, gpu_addr, image.info, unswizzled_data); 976 auto copies = UnswizzleImage(gpu_memory, gpu_addr, image.info, unswizzled_data);
969 ConvertImage(unswizzled_data, image.info, mapped_span, copies); 977 ConvertImage(unswizzled_data, image.info, mapped_span, copies);
970 image.UploadMemory(staging, copies); 978 image.UploadMemory(staging, copies);
971 } else if (image.info.type == ImageType::Buffer) {
972 const std::array copies{UploadBufferCopy(gpu_memory, gpu_addr, image, mapped_span)};
973 image.UploadMemory(staging, copies);
974 } else { 979 } else {
975 const auto copies = UnswizzleImage(gpu_memory, gpu_addr, image.info, mapped_span); 980 const auto copies = UnswizzleImage(gpu_memory, gpu_addr, image.info, mapped_span);
976 image.UploadMemory(staging, copies); 981 image.UploadMemory(staging, copies);
@@ -993,7 +998,12 @@ ImageViewId TextureCache<P>::FindImageView(const TICEntry& config) {
993template <class P> 998template <class P>
994ImageViewId TextureCache<P>::CreateImageView(const TICEntry& config) { 999ImageViewId TextureCache<P>::CreateImageView(const TICEntry& config) {
995 const ImageInfo info(config); 1000 const ImageInfo info(config);
996 const GPUVAddr image_gpu_addr = config.Address() - config.BaseLayer() * info.layer_stride; 1001 if (info.type == ImageType::Buffer) {
1002 const ImageViewInfo view_info(config, 0);
1003 return slot_image_views.insert(runtime, info, view_info, config.Address());
1004 }
1005 const u32 layer_offset = config.BaseLayer() * info.layer_stride;
1006 const GPUVAddr image_gpu_addr = config.Address() - layer_offset;
997 const ImageId image_id = FindOrInsertImage(info, image_gpu_addr); 1007 const ImageId image_id = FindOrInsertImage(info, image_gpu_addr);
998 if (!image_id) { 1008 if (!image_id) {
999 return NULL_IMAGE_VIEW_ID; 1009 return NULL_IMAGE_VIEW_ID;
@@ -1801,6 +1811,9 @@ void TextureCache<P>::PrepareImageView(ImageViewId image_view_id, bool is_modifi
1801 return; 1811 return;
1802 } 1812 }
1803 const ImageViewBase& image_view = slot_image_views[image_view_id]; 1813 const ImageViewBase& image_view = slot_image_views[image_view_id];
1814 if (image_view.IsBuffer()) {
1815 return;
1816 }
1804 PrepareImage(image_view.image_id, is_modification, invalidate); 1817 PrepareImage(image_view.image_id, is_modification, invalidate);
1805} 1818}
1806 1819
diff --git a/src/video_core/textures/texture.h b/src/video_core/textures/texture.h
index c1d14335e..1a9399455 100644
--- a/src/video_core/textures/texture.h
+++ b/src/video_core/textures/texture.h
@@ -154,6 +154,15 @@ union TextureHandle {
154}; 154};
155static_assert(sizeof(TextureHandle) == 4, "TextureHandle has wrong size"); 155static_assert(sizeof(TextureHandle) == 4, "TextureHandle has wrong size");
156 156
157[[nodiscard]] inline std::pair<u32, u32> TexturePair(u32 raw, bool via_header_index) {
158 if (via_header_index) {
159 return {raw, raw};
160 } else {
161 const Tegra::Texture::TextureHandle handle{raw};
162 return {handle.tic_id, via_header_index ? handle.tic_id : handle.tsc_id};
163 }
164}
165
157struct TICEntry { 166struct TICEntry {
158 union { 167 union {
159 struct { 168 struct {
diff --git a/src/video_core/transform_feedback.cpp b/src/video_core/transform_feedback.cpp
new file mode 100644
index 000000000..ba26ac3f1
--- /dev/null
+++ b/src/video_core/transform_feedback.cpp
@@ -0,0 +1,99 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <algorithm>
6#include <array>
7#include <vector>
8
9#include "common/alignment.h"
10#include "common/assert.h"
11#include "shader_recompiler/shader_info.h"
12#include "video_core/transform_feedback.h"
13
14namespace VideoCommon {
15
16std::vector<Shader::TransformFeedbackVarying> MakeTransformFeedbackVaryings(
17 const TransformFeedbackState& state) {
18 static constexpr std::array VECTORS{
19 28, // gl_Position
20 32, // Generic 0
21 36, // Generic 1
22 40, // Generic 2
23 44, // Generic 3
24 48, // Generic 4
25 52, // Generic 5
26 56, // Generic 6
27 60, // Generic 7
28 64, // Generic 8
29 68, // Generic 9
30 72, // Generic 10
31 76, // Generic 11
32 80, // Generic 12
33 84, // Generic 13
34 88, // Generic 14
35 92, // Generic 15
36 96, // Generic 16
37 100, // Generic 17
38 104, // Generic 18
39 108, // Generic 19
40 112, // Generic 20
41 116, // Generic 21
42 120, // Generic 22
43 124, // Generic 23
44 128, // Generic 24
45 132, // Generic 25
46 136, // Generic 26
47 140, // Generic 27
48 144, // Generic 28
49 148, // Generic 29
50 152, // Generic 30
51 156, // Generic 31
52 160, // gl_FrontColor
53 164, // gl_FrontSecondaryColor
54 160, // gl_BackColor
55 164, // gl_BackSecondaryColor
56 192, // gl_TexCoord[0]
57 196, // gl_TexCoord[1]
58 200, // gl_TexCoord[2]
59 204, // gl_TexCoord[3]
60 208, // gl_TexCoord[4]
61 212, // gl_TexCoord[5]
62 216, // gl_TexCoord[6]
63 220, // gl_TexCoord[7]
64 };
65 std::vector<Shader::TransformFeedbackVarying> xfb(256);
66 for (size_t buffer = 0; buffer < state.layouts.size(); ++buffer) {
67 const auto& locations = state.varyings[buffer];
68 const auto& layout = state.layouts[buffer];
69 const u32 varying_count = layout.varying_count;
70 u32 highest = 0;
71 for (u32 offset = 0; offset < varying_count; ++offset) {
72 const u32 base_offset = offset;
73 const u8 location = locations[offset];
74
75 UNIMPLEMENTED_IF_MSG(layout.stream != 0, "Stream is not zero: {}", layout.stream);
76 Shader::TransformFeedbackVarying varying{
77 .buffer = static_cast<u32>(buffer),
78 .stride = layout.stride,
79 .offset = offset * 4,
80 .components = 1,
81 };
82 if (std::ranges::find(VECTORS, Common::AlignDown(location, 4)) != VECTORS.end()) {
83 UNIMPLEMENTED_IF_MSG(location % 4 != 0, "Unaligned TFB");
84
85 const u8 base_index = location / 4;
86 while (offset + 1 < varying_count && base_index == locations[offset + 1] / 4) {
87 ++offset;
88 ++varying.components;
89 }
90 }
91 xfb[location] = varying;
92 highest = std::max(highest, (base_offset + varying.components) * 4);
93 }
94 UNIMPLEMENTED_IF(highest != layout.stride);
95 }
96 return xfb;
97}
98
99} // namespace VideoCommon
diff --git a/src/video_core/transform_feedback.h b/src/video_core/transform_feedback.h
new file mode 100644
index 000000000..8f6946d65
--- /dev/null
+++ b/src/video_core/transform_feedback.h
@@ -0,0 +1,30 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <array>
8#include <vector>
9
10#include "common/common_types.h"
11#include "shader_recompiler/runtime_info.h"
12#include "video_core/engines/maxwell_3d.h"
13
14namespace VideoCommon {
15
16struct TransformFeedbackState {
17 struct Layout {
18 u32 stream;
19 u32 varying_count;
20 u32 stride;
21 };
22 std::array<Layout, Tegra::Engines::Maxwell3D::Regs::NumTransformFeedbackBuffers> layouts;
23 std::array<std::array<u8, 128>, Tegra::Engines::Maxwell3D::Regs::NumTransformFeedbackBuffers>
24 varyings;
25};
26
27std::vector<Shader::TransformFeedbackVarying> MakeTransformFeedbackVaryings(
28 const TransformFeedbackState& state);
29
30} // namespace VideoCommon
diff --git a/src/video_core/vulkan_common/nsight_aftermath_tracker.cpp b/src/video_core/vulkan_common/nsight_aftermath_tracker.cpp
index 758c038ba..fdd1a5081 100644
--- a/src/video_core/vulkan_common/nsight_aftermath_tracker.cpp
+++ b/src/video_core/vulkan_common/nsight_aftermath_tracker.cpp
@@ -73,12 +73,11 @@ NsightAftermathTracker::~NsightAftermathTracker() {
73 } 73 }
74} 74}
75 75
76void NsightAftermathTracker::SaveShader(const std::vector<u32>& spirv) const { 76void NsightAftermathTracker::SaveShader(std::span<const u32> spirv) const {
77 if (!initialized) { 77 if (!initialized) {
78 return; 78 return;
79 } 79 }
80 80 std::vector<u32> spirv_copy(spirv.begin(), spirv.end());
81 std::vector<u32> spirv_copy = spirv;
82 GFSDK_Aftermath_SpirvCode shader; 81 GFSDK_Aftermath_SpirvCode shader;
83 shader.pData = spirv_copy.data(); 82 shader.pData = spirv_copy.data();
84 shader.size = static_cast<u32>(spirv_copy.size() * 4); 83 shader.size = static_cast<u32>(spirv_copy.size() * 4);
@@ -100,7 +99,7 @@ void NsightAftermathTracker::SaveShader(const std::vector<u32>& spirv) const {
100 LOG_ERROR(Render_Vulkan, "Failed to dump SPIR-V module with hash={:016x}", hash.hash); 99 LOG_ERROR(Render_Vulkan, "Failed to dump SPIR-V module with hash={:016x}", hash.hash);
101 return; 100 return;
102 } 101 }
103 if (file.Write(spirv) != spirv.size()) { 102 if (file.WriteSpan(spirv) != spirv.size()) {
104 LOG_ERROR(Render_Vulkan, "Failed to write SPIR-V module with hash={:016x}", hash.hash); 103 LOG_ERROR(Render_Vulkan, "Failed to write SPIR-V module with hash={:016x}", hash.hash);
105 return; 104 return;
106 } 105 }
diff --git a/src/video_core/vulkan_common/nsight_aftermath_tracker.h b/src/video_core/vulkan_common/nsight_aftermath_tracker.h
index 4fe2b14d9..eae1891dd 100644
--- a/src/video_core/vulkan_common/nsight_aftermath_tracker.h
+++ b/src/video_core/vulkan_common/nsight_aftermath_tracker.h
@@ -6,6 +6,7 @@
6 6
7#include <filesystem> 7#include <filesystem>
8#include <mutex> 8#include <mutex>
9#include <span>
9#include <string> 10#include <string>
10#include <vector> 11#include <vector>
11 12
@@ -33,7 +34,7 @@ public:
33 NsightAftermathTracker(NsightAftermathTracker&&) = delete; 34 NsightAftermathTracker(NsightAftermathTracker&&) = delete;
34 NsightAftermathTracker& operator=(NsightAftermathTracker&&) = delete; 35 NsightAftermathTracker& operator=(NsightAftermathTracker&&) = delete;
35 36
36 void SaveShader(const std::vector<u32>& spirv) const; 37 void SaveShader(std::span<const u32> spirv) const;
37 38
38private: 39private:
39#ifdef HAS_NSIGHT_AFTERMATH 40#ifdef HAS_NSIGHT_AFTERMATH
@@ -61,21 +62,21 @@ private:
61 bool initialized = false; 62 bool initialized = false;
62 63
63 Common::DynamicLibrary dl; 64 Common::DynamicLibrary dl;
64 PFN_GFSDK_Aftermath_DisableGpuCrashDumps GFSDK_Aftermath_DisableGpuCrashDumps; 65 PFN_GFSDK_Aftermath_DisableGpuCrashDumps GFSDK_Aftermath_DisableGpuCrashDumps{};
65 PFN_GFSDK_Aftermath_EnableGpuCrashDumps GFSDK_Aftermath_EnableGpuCrashDumps; 66 PFN_GFSDK_Aftermath_EnableGpuCrashDumps GFSDK_Aftermath_EnableGpuCrashDumps{};
66 PFN_GFSDK_Aftermath_GetShaderDebugInfoIdentifier GFSDK_Aftermath_GetShaderDebugInfoIdentifier; 67 PFN_GFSDK_Aftermath_GetShaderDebugInfoIdentifier GFSDK_Aftermath_GetShaderDebugInfoIdentifier{};
67 PFN_GFSDK_Aftermath_GetShaderHashSpirv GFSDK_Aftermath_GetShaderHashSpirv; 68 PFN_GFSDK_Aftermath_GetShaderHashSpirv GFSDK_Aftermath_GetShaderHashSpirv{};
68 PFN_GFSDK_Aftermath_GpuCrashDump_CreateDecoder GFSDK_Aftermath_GpuCrashDump_CreateDecoder; 69 PFN_GFSDK_Aftermath_GpuCrashDump_CreateDecoder GFSDK_Aftermath_GpuCrashDump_CreateDecoder{};
69 PFN_GFSDK_Aftermath_GpuCrashDump_DestroyDecoder GFSDK_Aftermath_GpuCrashDump_DestroyDecoder; 70 PFN_GFSDK_Aftermath_GpuCrashDump_DestroyDecoder GFSDK_Aftermath_GpuCrashDump_DestroyDecoder{};
70 PFN_GFSDK_Aftermath_GpuCrashDump_GenerateJSON GFSDK_Aftermath_GpuCrashDump_GenerateJSON; 71 PFN_GFSDK_Aftermath_GpuCrashDump_GenerateJSON GFSDK_Aftermath_GpuCrashDump_GenerateJSON{};
71 PFN_GFSDK_Aftermath_GpuCrashDump_GetJSON GFSDK_Aftermath_GpuCrashDump_GetJSON; 72 PFN_GFSDK_Aftermath_GpuCrashDump_GetJSON GFSDK_Aftermath_GpuCrashDump_GetJSON{};
72#endif 73#endif
73}; 74};
74 75
75#ifndef HAS_NSIGHT_AFTERMATH 76#ifndef HAS_NSIGHT_AFTERMATH
76inline NsightAftermathTracker::NsightAftermathTracker() = default; 77inline NsightAftermathTracker::NsightAftermathTracker() = default;
77inline NsightAftermathTracker::~NsightAftermathTracker() = default; 78inline NsightAftermathTracker::~NsightAftermathTracker() = default;
78inline void NsightAftermathTracker::SaveShader(const std::vector<u32>&) const {} 79inline void NsightAftermathTracker::SaveShader(std::span<const u32>) const {}
79#endif 80#endif
80 81
81} // namespace Vulkan 82} // namespace Vulkan
diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp
index f214510da..44afdc1cd 100644
--- a/src/video_core/vulkan_common/vulkan_device.cpp
+++ b/src/video_core/vulkan_common/vulkan_device.cpp
@@ -2,6 +2,7 @@
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include <algorithm>
5#include <bitset> 6#include <bitset>
6#include <chrono> 7#include <chrono>
7#include <optional> 8#include <optional>
@@ -33,6 +34,12 @@ constexpr std::array DEPTH16_UNORM_STENCIL8_UINT{
33}; 34};
34} // namespace Alternatives 35} // namespace Alternatives
35 36
37enum class NvidiaArchitecture {
38 AmpereOrNewer,
39 Turing,
40 VoltaOrOlder,
41};
42
36constexpr std::array REQUIRED_EXTENSIONS{ 43constexpr std::array REQUIRED_EXTENSIONS{
37 VK_KHR_MAINTENANCE1_EXTENSION_NAME, 44 VK_KHR_MAINTENANCE1_EXTENSION_NAME,
38 VK_KHR_STORAGE_BUFFER_STORAGE_CLASS_EXTENSION_NAME, 45 VK_KHR_STORAGE_BUFFER_STORAGE_CLASS_EXTENSION_NAME,
@@ -43,11 +50,14 @@ constexpr std::array REQUIRED_EXTENSIONS{
43 VK_KHR_DESCRIPTOR_UPDATE_TEMPLATE_EXTENSION_NAME, 50 VK_KHR_DESCRIPTOR_UPDATE_TEMPLATE_EXTENSION_NAME,
44 VK_KHR_TIMELINE_SEMAPHORE_EXTENSION_NAME, 51 VK_KHR_TIMELINE_SEMAPHORE_EXTENSION_NAME,
45 VK_KHR_SAMPLER_MIRROR_CLAMP_TO_EDGE_EXTENSION_NAME, 52 VK_KHR_SAMPLER_MIRROR_CLAMP_TO_EDGE_EXTENSION_NAME,
53 VK_KHR_SHADER_FLOAT_CONTROLS_EXTENSION_NAME,
54 VK_KHR_VARIABLE_POINTERS_EXTENSION_NAME,
46 VK_EXT_VERTEX_ATTRIBUTE_DIVISOR_EXTENSION_NAME, 55 VK_EXT_VERTEX_ATTRIBUTE_DIVISOR_EXTENSION_NAME,
47 VK_EXT_SHADER_SUBGROUP_BALLOT_EXTENSION_NAME, 56 VK_EXT_SHADER_SUBGROUP_BALLOT_EXTENSION_NAME,
48 VK_EXT_SHADER_SUBGROUP_VOTE_EXTENSION_NAME, 57 VK_EXT_SHADER_SUBGROUP_VOTE_EXTENSION_NAME,
49 VK_EXT_ROBUSTNESS_2_EXTENSION_NAME, 58 VK_EXT_ROBUSTNESS_2_EXTENSION_NAME,
50 VK_EXT_HOST_QUERY_RESET_EXTENSION_NAME, 59 VK_EXT_HOST_QUERY_RESET_EXTENSION_NAME,
60 VK_EXT_SHADER_DEMOTE_TO_HELPER_INVOCATION_EXTENSION_NAME,
51#ifdef _WIN32 61#ifdef _WIN32
52 VK_KHR_EXTERNAL_MEMORY_WIN32_EXTENSION_NAME, 62 VK_KHR_EXTERNAL_MEMORY_WIN32_EXTENSION_NAME,
53#endif 63#endif
@@ -112,6 +122,7 @@ std::unordered_map<VkFormat, VkFormatProperties> GetFormatProperties(vk::Physica
112 VK_FORMAT_R16G16_SFLOAT, 122 VK_FORMAT_R16G16_SFLOAT,
113 VK_FORMAT_R16G16_SINT, 123 VK_FORMAT_R16G16_SINT,
114 VK_FORMAT_R16_UNORM, 124 VK_FORMAT_R16_UNORM,
125 VK_FORMAT_R16_SNORM,
115 VK_FORMAT_R16_UINT, 126 VK_FORMAT_R16_UINT,
116 VK_FORMAT_R8G8B8A8_SRGB, 127 VK_FORMAT_R8G8B8A8_SRGB,
117 VK_FORMAT_R8G8_UNORM, 128 VK_FORMAT_R8G8_UNORM,
@@ -191,15 +202,47 @@ std::unordered_map<VkFormat, VkFormatProperties> GetFormatProperties(vk::Physica
191 return format_properties; 202 return format_properties;
192} 203}
193 204
205std::vector<std::string> GetSupportedExtensions(vk::PhysicalDevice physical) {
206 const std::vector extensions = physical.EnumerateDeviceExtensionProperties();
207 std::vector<std::string> supported_extensions;
208 supported_extensions.reserve(extensions.size());
209 for (const auto& extension : extensions) {
210 supported_extensions.emplace_back(extension.extensionName);
211 }
212 return supported_extensions;
213}
214
215NvidiaArchitecture GetNvidiaArchitecture(vk::PhysicalDevice physical,
216 std::span<const std::string> exts) {
217 if (std::ranges::find(exts, VK_KHR_FRAGMENT_SHADING_RATE_EXTENSION_NAME) != exts.end()) {
218 VkPhysicalDeviceFragmentShadingRatePropertiesKHR shading_rate_props{};
219 shading_rate_props.sType =
220 VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FRAGMENT_SHADING_RATE_PROPERTIES_KHR;
221 VkPhysicalDeviceProperties2KHR physical_properties{};
222 physical_properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2_KHR;
223 physical_properties.pNext = &shading_rate_props;
224 physical.GetProperties2KHR(physical_properties);
225 if (shading_rate_props.primitiveFragmentShadingRateWithMultipleViewports) {
226 // Only Ampere and newer support this feature
227 return NvidiaArchitecture::AmpereOrNewer;
228 }
229 }
230 if (std::ranges::find(exts, VK_NV_SHADING_RATE_IMAGE_EXTENSION_NAME) != exts.end()) {
231 return NvidiaArchitecture::Turing;
232 }
233 return NvidiaArchitecture::VoltaOrOlder;
234}
194} // Anonymous namespace 235} // Anonymous namespace
195 236
196Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR surface, 237Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR surface,
197 const vk::InstanceDispatch& dld_) 238 const vk::InstanceDispatch& dld_)
198 : instance{instance_}, dld{dld_}, physical{physical_}, properties{physical.GetProperties()}, 239 : instance{instance_}, dld{dld_}, physical{physical_}, properties{physical.GetProperties()},
199 format_properties{GetFormatProperties(physical)} { 240 supported_extensions{GetSupportedExtensions(physical)},
241 format_properties(GetFormatProperties(physical)) {
200 CheckSuitability(surface != nullptr); 242 CheckSuitability(surface != nullptr);
201 SetupFamilies(surface); 243 SetupFamilies(surface);
202 SetupFeatures(); 244 SetupFeatures();
245 SetupProperties();
203 246
204 const auto queue_cis = GetDeviceQueueCreateInfos(); 247 const auto queue_cis = GetDeviceQueueCreateInfos();
205 const std::vector extensions = LoadExtensions(surface != nullptr); 248 const std::vector extensions = LoadExtensions(surface != nullptr);
@@ -214,16 +257,16 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR
214 .independentBlend = true, 257 .independentBlend = true,
215 .geometryShader = true, 258 .geometryShader = true,
216 .tessellationShader = true, 259 .tessellationShader = true,
217 .sampleRateShading = false, 260 .sampleRateShading = true,
218 .dualSrcBlend = false, 261 .dualSrcBlend = true,
219 .logicOp = false, 262 .logicOp = false,
220 .multiDrawIndirect = false, 263 .multiDrawIndirect = false,
221 .drawIndirectFirstInstance = false, 264 .drawIndirectFirstInstance = false,
222 .depthClamp = true, 265 .depthClamp = true,
223 .depthBiasClamp = true, 266 .depthBiasClamp = true,
224 .fillModeNonSolid = false, 267 .fillModeNonSolid = true,
225 .depthBounds = false, 268 .depthBounds = is_depth_bounds_supported,
226 .wideLines = false, 269 .wideLines = true,
227 .largePoints = true, 270 .largePoints = true,
228 .alphaToOne = false, 271 .alphaToOne = false,
229 .multiViewport = true, 272 .multiViewport = true,
@@ -245,11 +288,11 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR
245 .shaderSampledImageArrayDynamicIndexing = false, 288 .shaderSampledImageArrayDynamicIndexing = false,
246 .shaderStorageBufferArrayDynamicIndexing = false, 289 .shaderStorageBufferArrayDynamicIndexing = false,
247 .shaderStorageImageArrayDynamicIndexing = false, 290 .shaderStorageImageArrayDynamicIndexing = false,
248 .shaderClipDistance = false, 291 .shaderClipDistance = true,
249 .shaderCullDistance = false, 292 .shaderCullDistance = true,
250 .shaderFloat64 = false, 293 .shaderFloat64 = is_shader_float64_supported,
251 .shaderInt64 = false, 294 .shaderInt64 = is_shader_int64_supported,
252 .shaderInt16 = false, 295 .shaderInt16 = is_shader_int16_supported,
253 .shaderResourceResidency = false, 296 .shaderResourceResidency = false,
254 .shaderResourceMinLod = false, 297 .shaderResourceMinLod = false,
255 .sparseBinding = false, 298 .sparseBinding = false,
@@ -278,7 +321,7 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR
278 VkPhysicalDevice16BitStorageFeaturesKHR bit16_storage{ 321 VkPhysicalDevice16BitStorageFeaturesKHR bit16_storage{
279 .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_16BIT_STORAGE_FEATURES_KHR, 322 .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_16BIT_STORAGE_FEATURES_KHR,
280 .pNext = nullptr, 323 .pNext = nullptr,
281 .storageBuffer16BitAccess = false, 324 .storageBuffer16BitAccess = true,
282 .uniformAndStorageBuffer16BitAccess = true, 325 .uniformAndStorageBuffer16BitAccess = true,
283 .storagePushConstant16 = false, 326 .storagePushConstant16 = false,
284 .storageInputOutput16 = false, 327 .storageInputOutput16 = false,
@@ -310,6 +353,21 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR
310 }; 353 };
311 SetNext(next, host_query_reset); 354 SetNext(next, host_query_reset);
312 355
356 VkPhysicalDeviceVariablePointerFeaturesKHR variable_pointers{
357 .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VARIABLE_POINTERS_FEATURES_KHR,
358 .pNext = nullptr,
359 .variablePointersStorageBuffer = VK_TRUE,
360 .variablePointers = VK_TRUE,
361 };
362 SetNext(next, variable_pointers);
363
364 VkPhysicalDeviceShaderDemoteToHelperInvocationFeaturesEXT demote{
365 .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_DEMOTE_TO_HELPER_INVOCATION_FEATURES_EXT,
366 .pNext = nullptr,
367 .shaderDemoteToHelperInvocation = true,
368 };
369 SetNext(next, demote);
370
313 VkPhysicalDeviceFloat16Int8FeaturesKHR float16_int8; 371 VkPhysicalDeviceFloat16Int8FeaturesKHR float16_int8;
314 if (is_float16_supported) { 372 if (is_float16_supported) {
315 float16_int8 = { 373 float16_int8 = {
@@ -327,6 +385,14 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR
327 LOG_INFO(Render_Vulkan, "Device doesn't support viewport swizzles"); 385 LOG_INFO(Render_Vulkan, "Device doesn't support viewport swizzles");
328 } 386 }
329 387
388 if (!nv_viewport_array2) {
389 LOG_INFO(Render_Vulkan, "Device doesn't support viewport masks");
390 }
391
392 if (!nv_geometry_shader_passthrough) {
393 LOG_INFO(Render_Vulkan, "Device doesn't support passthrough geometry shaders");
394 }
395
330 VkPhysicalDeviceUniformBufferStandardLayoutFeaturesKHR std430_layout; 396 VkPhysicalDeviceUniformBufferStandardLayoutFeaturesKHR std430_layout;
331 if (khr_uniform_buffer_standard_layout) { 397 if (khr_uniform_buffer_standard_layout) {
332 std430_layout = { 398 std430_layout = {
@@ -389,12 +455,83 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR
389 LOG_INFO(Render_Vulkan, "Device doesn't support extended dynamic state"); 455 LOG_INFO(Render_Vulkan, "Device doesn't support extended dynamic state");
390 } 456 }
391 457
458 VkPhysicalDeviceLineRasterizationFeaturesEXT line_raster;
459 if (ext_line_rasterization) {
460 line_raster = {
461 .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_LINE_RASTERIZATION_FEATURES_EXT,
462 .pNext = nullptr,
463 .rectangularLines = VK_TRUE,
464 .bresenhamLines = VK_FALSE,
465 .smoothLines = VK_TRUE,
466 .stippledRectangularLines = VK_FALSE,
467 .stippledBresenhamLines = VK_FALSE,
468 .stippledSmoothLines = VK_FALSE,
469 };
470 SetNext(next, line_raster);
471 } else {
472 LOG_INFO(Render_Vulkan, "Device doesn't support smooth lines");
473 }
474
475 if (!ext_conservative_rasterization) {
476 LOG_INFO(Render_Vulkan, "Device doesn't support conservative rasterization");
477 }
478
479 VkPhysicalDeviceProvokingVertexFeaturesEXT provoking_vertex;
480 if (ext_provoking_vertex) {
481 provoking_vertex = {
482 .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROVOKING_VERTEX_FEATURES_EXT,
483 .pNext = nullptr,
484 .provokingVertexLast = VK_TRUE,
485 .transformFeedbackPreservesProvokingVertex = VK_TRUE,
486 };
487 SetNext(next, provoking_vertex);
488 } else {
489 LOG_INFO(Render_Vulkan, "Device doesn't support provoking vertex last");
490 }
491
492 VkPhysicalDeviceVertexInputDynamicStateFeaturesEXT vertex_input_dynamic;
493 if (ext_vertex_input_dynamic_state) {
494 vertex_input_dynamic = {
495 .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VERTEX_INPUT_DYNAMIC_STATE_FEATURES_EXT,
496 .pNext = nullptr,
497 .vertexInputDynamicState = VK_TRUE,
498 };
499 SetNext(next, vertex_input_dynamic);
500 } else {
501 LOG_INFO(Render_Vulkan, "Device doesn't support vertex input dynamic state");
502 }
503
504 VkPhysicalDeviceShaderAtomicInt64FeaturesKHR atomic_int64;
505 if (ext_shader_atomic_int64) {
506 atomic_int64 = {
507 .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_ATOMIC_INT64_FEATURES_KHR,
508 .pNext = nullptr,
509 .shaderBufferInt64Atomics = VK_TRUE,
510 .shaderSharedInt64Atomics = VK_TRUE,
511 };
512 SetNext(next, atomic_int64);
513 }
514
515 VkPhysicalDeviceWorkgroupMemoryExplicitLayoutFeaturesKHR workgroup_layout;
516 if (khr_workgroup_memory_explicit_layout) {
517 workgroup_layout = {
518 .sType =
519 VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_WORKGROUP_MEMORY_EXPLICIT_LAYOUT_FEATURES_KHR,
520 .pNext = nullptr,
521 .workgroupMemoryExplicitLayout = VK_TRUE,
522 .workgroupMemoryExplicitLayoutScalarBlockLayout = VK_TRUE,
523 .workgroupMemoryExplicitLayout8BitAccess = VK_TRUE,
524 .workgroupMemoryExplicitLayout16BitAccess = VK_TRUE,
525 };
526 SetNext(next, workgroup_layout);
527 }
528
392 if (!ext_depth_range_unrestricted) { 529 if (!ext_depth_range_unrestricted) {
393 LOG_INFO(Render_Vulkan, "Device doesn't support depth range unrestricted"); 530 LOG_INFO(Render_Vulkan, "Device doesn't support depth range unrestricted");
394 } 531 }
395 532
396 VkDeviceDiagnosticsConfigCreateInfoNV diagnostics_nv; 533 VkDeviceDiagnosticsConfigCreateInfoNV diagnostics_nv;
397 if (nv_device_diagnostics_config) { 534 if (Settings::values.enable_nsight_aftermath && nv_device_diagnostics_config) {
398 nsight_aftermath_tracker = std::make_unique<NsightAftermathTracker>(); 535 nsight_aftermath_tracker = std::make_unique<NsightAftermathTracker>();
399 536
400 diagnostics_nv = { 537 diagnostics_nv = {
@@ -412,11 +549,33 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR
412 CollectTelemetryParameters(); 549 CollectTelemetryParameters();
413 CollectToolingInfo(); 550 CollectToolingInfo();
414 551
552 if (driver_id == VK_DRIVER_ID_NVIDIA_PROPRIETARY_KHR) {
553 const auto arch = GetNvidiaArchitecture(physical, supported_extensions);
554 switch (arch) {
555 case NvidiaArchitecture::AmpereOrNewer:
556 LOG_WARNING(Render_Vulkan, "Blacklisting Ampere devices from float16 math");
557 is_float16_supported = false;
558 break;
559 case NvidiaArchitecture::Turing:
560 break;
561 case NvidiaArchitecture::VoltaOrOlder:
562 LOG_WARNING(Render_Vulkan, "Blacklisting Volta and older from VK_KHR_push_descriptor");
563 khr_push_descriptor = false;
564 break;
565 }
566 }
415 if (ext_extended_dynamic_state && driver_id == VK_DRIVER_ID_MESA_RADV) { 567 if (ext_extended_dynamic_state && driver_id == VK_DRIVER_ID_MESA_RADV) {
416 LOG_WARNING( 568 // Mask driver version variant
417 Render_Vulkan, 569 const u32 version = (properties.driverVersion << 3) >> 3;
418 "Blacklisting RADV for VK_EXT_extended_dynamic state, likely due to a bug in yuzu"); 570 if (version < VK_MAKE_API_VERSION(0, 21, 2, 0)) {
419 ext_extended_dynamic_state = false; 571 LOG_WARNING(Render_Vulkan,
572 "RADV versions older than 21.2 have broken VK_EXT_extended_dynamic_state");
573 ext_extended_dynamic_state = false;
574 }
575 }
576 if (ext_vertex_input_dynamic_state && driver_id == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS) {
577 LOG_WARNING(Render_Vulkan, "Blacklisting Intel for VK_EXT_vertex_input_dynamic_state");
578 ext_vertex_input_dynamic_state = false;
420 } 579 }
421 if (is_float16_supported && driver_id == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS) { 580 if (is_float16_supported && driver_id == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS) {
422 // Intel's compiler crashes when using fp16 on Astral Chain, disable it for the time being. 581 // Intel's compiler crashes when using fp16 on Astral Chain, disable it for the time being.
@@ -426,8 +585,6 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR
426 585
427 graphics_queue = logical.GetQueue(graphics_family); 586 graphics_queue = logical.GetQueue(graphics_family);
428 present_queue = logical.GetQueue(present_family); 587 present_queue = logical.GetQueue(present_family);
429
430 use_asynchronous_shaders = Settings::values.use_asynchronous_shaders.GetValue();
431} 588}
432 589
433Device::~Device() = default; 590Device::~Device() = default;
@@ -471,7 +628,7 @@ void Device::ReportLoss() const {
471 std::this_thread::sleep_for(std::chrono::seconds{15}); 628 std::this_thread::sleep_for(std::chrono::seconds{15});
472} 629}
473 630
474void Device::SaveShader(const std::vector<u32>& spirv) const { 631void Device::SaveShader(std::span<const u32> spirv) const {
475 if (nsight_aftermath_tracker) { 632 if (nsight_aftermath_tracker) {
476 nsight_aftermath_tracker->SaveShader(spirv); 633 nsight_aftermath_tracker->SaveShader(spirv);
477 } 634 }
@@ -597,10 +754,20 @@ void Device::CheckSuitability(bool requires_swapchain) const {
597 throw vk::Exception(VK_ERROR_FEATURE_NOT_PRESENT); 754 throw vk::Exception(VK_ERROR_FEATURE_NOT_PRESENT);
598 } 755 }
599 } 756 }
757 VkPhysicalDeviceShaderDemoteToHelperInvocationFeaturesEXT demote{};
758 demote.sType =
759 VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_DEMOTE_TO_HELPER_INVOCATION_FEATURES_EXT;
760 demote.pNext = nullptr;
761
762 VkPhysicalDeviceVariablePointerFeaturesKHR variable_pointers{};
763 variable_pointers.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VARIABLE_POINTERS_FEATURES_KHR;
764 variable_pointers.pNext = &demote;
765
600 VkPhysicalDeviceRobustness2FeaturesEXT robustness2{}; 766 VkPhysicalDeviceRobustness2FeaturesEXT robustness2{};
601 robustness2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ROBUSTNESS_2_FEATURES_EXT; 767 robustness2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ROBUSTNESS_2_FEATURES_EXT;
768 robustness2.pNext = &variable_pointers;
602 769
603 VkPhysicalDeviceFeatures2 features2{}; 770 VkPhysicalDeviceFeatures2KHR features2{};
604 features2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2; 771 features2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2;
605 features2.pNext = &robustness2; 772 features2.pNext = &robustness2;
606 773
@@ -610,7 +777,6 @@ void Device::CheckSuitability(bool requires_swapchain) const {
610 const std::array feature_report{ 777 const std::array feature_report{
611 std::make_pair(features.robustBufferAccess, "robustBufferAccess"), 778 std::make_pair(features.robustBufferAccess, "robustBufferAccess"),
612 std::make_pair(features.vertexPipelineStoresAndAtomics, "vertexPipelineStoresAndAtomics"), 779 std::make_pair(features.vertexPipelineStoresAndAtomics, "vertexPipelineStoresAndAtomics"),
613 std::make_pair(features.robustBufferAccess, "robustBufferAccess"),
614 std::make_pair(features.imageCubeArray, "imageCubeArray"), 780 std::make_pair(features.imageCubeArray, "imageCubeArray"),
615 std::make_pair(features.independentBlend, "independentBlend"), 781 std::make_pair(features.independentBlend, "independentBlend"),
616 std::make_pair(features.depthClamp, "depthClamp"), 782 std::make_pair(features.depthClamp, "depthClamp"),
@@ -618,13 +784,23 @@ void Device::CheckSuitability(bool requires_swapchain) const {
618 std::make_pair(features.largePoints, "largePoints"), 784 std::make_pair(features.largePoints, "largePoints"),
619 std::make_pair(features.multiViewport, "multiViewport"), 785 std::make_pair(features.multiViewport, "multiViewport"),
620 std::make_pair(features.depthBiasClamp, "depthBiasClamp"), 786 std::make_pair(features.depthBiasClamp, "depthBiasClamp"),
787 std::make_pair(features.fillModeNonSolid, "fillModeNonSolid"),
788 std::make_pair(features.wideLines, "wideLines"),
621 std::make_pair(features.geometryShader, "geometryShader"), 789 std::make_pair(features.geometryShader, "geometryShader"),
622 std::make_pair(features.tessellationShader, "tessellationShader"), 790 std::make_pair(features.tessellationShader, "tessellationShader"),
791 std::make_pair(features.sampleRateShading, "sampleRateShading"),
792 std::make_pair(features.dualSrcBlend, "dualSrcBlend"),
623 std::make_pair(features.occlusionQueryPrecise, "occlusionQueryPrecise"), 793 std::make_pair(features.occlusionQueryPrecise, "occlusionQueryPrecise"),
624 std::make_pair(features.fragmentStoresAndAtomics, "fragmentStoresAndAtomics"), 794 std::make_pair(features.fragmentStoresAndAtomics, "fragmentStoresAndAtomics"),
625 std::make_pair(features.shaderImageGatherExtended, "shaderImageGatherExtended"), 795 std::make_pair(features.shaderImageGatherExtended, "shaderImageGatherExtended"),
626 std::make_pair(features.shaderStorageImageWriteWithoutFormat, 796 std::make_pair(features.shaderStorageImageWriteWithoutFormat,
627 "shaderStorageImageWriteWithoutFormat"), 797 "shaderStorageImageWriteWithoutFormat"),
798 std::make_pair(features.shaderClipDistance, "shaderClipDistance"),
799 std::make_pair(features.shaderCullDistance, "shaderCullDistance"),
800 std::make_pair(demote.shaderDemoteToHelperInvocation, "shaderDemoteToHelperInvocation"),
801 std::make_pair(variable_pointers.variablePointers, "variablePointers"),
802 std::make_pair(variable_pointers.variablePointersStorageBuffer,
803 "variablePointersStorageBuffer"),
628 std::make_pair(robustness2.robustBufferAccess2, "robustBufferAccess2"), 804 std::make_pair(robustness2.robustBufferAccess2, "robustBufferAccess2"),
629 std::make_pair(robustness2.robustImageAccess2, "robustImageAccess2"), 805 std::make_pair(robustness2.robustImageAccess2, "robustImageAccess2"),
630 std::make_pair(robustness2.nullDescriptor, "nullDescriptor"), 806 std::make_pair(robustness2.nullDescriptor, "nullDescriptor"),
@@ -647,14 +823,19 @@ std::vector<const char*> Device::LoadExtensions(bool requires_surface) {
647 } 823 }
648 824
649 bool has_khr_shader_float16_int8{}; 825 bool has_khr_shader_float16_int8{};
826 bool has_khr_workgroup_memory_explicit_layout{};
650 bool has_ext_subgroup_size_control{}; 827 bool has_ext_subgroup_size_control{};
651 bool has_ext_transform_feedback{}; 828 bool has_ext_transform_feedback{};
652 bool has_ext_custom_border_color{}; 829 bool has_ext_custom_border_color{};
653 bool has_ext_extended_dynamic_state{}; 830 bool has_ext_extended_dynamic_state{};
654 for (const VkExtensionProperties& extension : physical.EnumerateDeviceExtensionProperties()) { 831 bool has_ext_shader_atomic_int64{};
832 bool has_ext_provoking_vertex{};
833 bool has_ext_vertex_input_dynamic_state{};
834 bool has_ext_line_rasterization{};
835 for (const std::string& extension : supported_extensions) {
655 const auto test = [&](std::optional<std::reference_wrapper<bool>> status, const char* name, 836 const auto test = [&](std::optional<std::reference_wrapper<bool>> status, const char* name,
656 bool push) { 837 bool push) {
657 if (extension.extensionName != std::string_view(name)) { 838 if (extension != name) {
658 return; 839 return;
659 } 840 }
660 if (push) { 841 if (push) {
@@ -665,8 +846,13 @@ std::vector<const char*> Device::LoadExtensions(bool requires_surface) {
665 } 846 }
666 }; 847 };
667 test(nv_viewport_swizzle, VK_NV_VIEWPORT_SWIZZLE_EXTENSION_NAME, true); 848 test(nv_viewport_swizzle, VK_NV_VIEWPORT_SWIZZLE_EXTENSION_NAME, true);
849 test(nv_viewport_array2, VK_NV_VIEWPORT_ARRAY2_EXTENSION_NAME, true);
850 test(nv_geometry_shader_passthrough, VK_NV_GEOMETRY_SHADER_PASSTHROUGH_EXTENSION_NAME,
851 true);
668 test(khr_uniform_buffer_standard_layout, 852 test(khr_uniform_buffer_standard_layout,
669 VK_KHR_UNIFORM_BUFFER_STANDARD_LAYOUT_EXTENSION_NAME, true); 853 VK_KHR_UNIFORM_BUFFER_STANDARD_LAYOUT_EXTENSION_NAME, true);
854 test(khr_spirv_1_4, VK_KHR_SPIRV_1_4_EXTENSION_NAME, true);
855 test(khr_push_descriptor, VK_KHR_PUSH_DESCRIPTOR_EXTENSION_NAME, true);
670 test(has_khr_shader_float16_int8, VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME, false); 856 test(has_khr_shader_float16_int8, VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME, false);
671 test(ext_depth_range_unrestricted, VK_EXT_DEPTH_RANGE_UNRESTRICTED_EXTENSION_NAME, true); 857 test(ext_depth_range_unrestricted, VK_EXT_DEPTH_RANGE_UNRESTRICTED_EXTENSION_NAME, true);
672 test(ext_index_type_uint8, VK_EXT_INDEX_TYPE_UINT8_EXTENSION_NAME, true); 858 test(ext_index_type_uint8, VK_EXT_INDEX_TYPE_UINT8_EXTENSION_NAME, true);
@@ -675,16 +861,25 @@ std::vector<const char*> Device::LoadExtensions(bool requires_surface) {
675 true); 861 true);
676 test(ext_tooling_info, VK_EXT_TOOLING_INFO_EXTENSION_NAME, true); 862 test(ext_tooling_info, VK_EXT_TOOLING_INFO_EXTENSION_NAME, true);
677 test(ext_shader_stencil_export, VK_EXT_SHADER_STENCIL_EXPORT_EXTENSION_NAME, true); 863 test(ext_shader_stencil_export, VK_EXT_SHADER_STENCIL_EXPORT_EXTENSION_NAME, true);
864 test(ext_conservative_rasterization, VK_EXT_CONSERVATIVE_RASTERIZATION_EXTENSION_NAME,
865 true);
678 test(has_ext_transform_feedback, VK_EXT_TRANSFORM_FEEDBACK_EXTENSION_NAME, false); 866 test(has_ext_transform_feedback, VK_EXT_TRANSFORM_FEEDBACK_EXTENSION_NAME, false);
679 test(has_ext_custom_border_color, VK_EXT_CUSTOM_BORDER_COLOR_EXTENSION_NAME, false); 867 test(has_ext_custom_border_color, VK_EXT_CUSTOM_BORDER_COLOR_EXTENSION_NAME, false);
680 test(has_ext_extended_dynamic_state, VK_EXT_EXTENDED_DYNAMIC_STATE_EXTENSION_NAME, false); 868 test(has_ext_extended_dynamic_state, VK_EXT_EXTENDED_DYNAMIC_STATE_EXTENSION_NAME, false);
681 test(has_ext_subgroup_size_control, VK_EXT_SUBGROUP_SIZE_CONTROL_EXTENSION_NAME, false); 869 test(has_ext_subgroup_size_control, VK_EXT_SUBGROUP_SIZE_CONTROL_EXTENSION_NAME, false);
682 if (Settings::values.renderer_debug) { 870 test(has_ext_provoking_vertex, VK_EXT_PROVOKING_VERTEX_EXTENSION_NAME, false);
871 test(has_ext_vertex_input_dynamic_state, VK_EXT_VERTEX_INPUT_DYNAMIC_STATE_EXTENSION_NAME,
872 false);
873 test(has_ext_shader_atomic_int64, VK_KHR_SHADER_ATOMIC_INT64_EXTENSION_NAME, false);
874 test(has_khr_workgroup_memory_explicit_layout,
875 VK_KHR_WORKGROUP_MEMORY_EXPLICIT_LAYOUT_EXTENSION_NAME, false);
876 test(has_ext_line_rasterization, VK_EXT_LINE_RASTERIZATION_EXTENSION_NAME, false);
877 if (Settings::values.enable_nsight_aftermath) {
683 test(nv_device_diagnostics_config, VK_NV_DEVICE_DIAGNOSTICS_CONFIG_EXTENSION_NAME, 878 test(nv_device_diagnostics_config, VK_NV_DEVICE_DIAGNOSTICS_CONFIG_EXTENSION_NAME,
684 true); 879 true);
685 } 880 }
686 } 881 }
687 VkPhysicalDeviceFeatures2KHR features; 882 VkPhysicalDeviceFeatures2KHR features{};
688 features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2_KHR; 883 features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2_KHR;
689 884
690 VkPhysicalDeviceProperties2KHR physical_properties; 885 VkPhysicalDeviceProperties2KHR physical_properties;
@@ -722,10 +917,49 @@ std::vector<const char*> Device::LoadExtensions(bool requires_surface) {
722 subgroup_properties.maxSubgroupSize >= GuestWarpSize) { 917 subgroup_properties.maxSubgroupSize >= GuestWarpSize) {
723 extensions.push_back(VK_EXT_SUBGROUP_SIZE_CONTROL_EXTENSION_NAME); 918 extensions.push_back(VK_EXT_SUBGROUP_SIZE_CONTROL_EXTENSION_NAME);
724 guest_warp_stages = subgroup_properties.requiredSubgroupSizeStages; 919 guest_warp_stages = subgroup_properties.requiredSubgroupSizeStages;
920 ext_subgroup_size_control = true;
725 } 921 }
726 } else { 922 } else {
727 is_warp_potentially_bigger = true; 923 is_warp_potentially_bigger = true;
728 } 924 }
925 if (has_ext_provoking_vertex) {
926 VkPhysicalDeviceProvokingVertexFeaturesEXT provoking_vertex;
927 provoking_vertex.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROVOKING_VERTEX_FEATURES_EXT;
928 provoking_vertex.pNext = nullptr;
929 features.pNext = &provoking_vertex;
930 physical.GetFeatures2KHR(features);
931
932 if (provoking_vertex.provokingVertexLast &&
933 provoking_vertex.transformFeedbackPreservesProvokingVertex) {
934 extensions.push_back(VK_EXT_PROVOKING_VERTEX_EXTENSION_NAME);
935 ext_provoking_vertex = true;
936 }
937 }
938 if (has_ext_vertex_input_dynamic_state) {
939 VkPhysicalDeviceVertexInputDynamicStateFeaturesEXT vertex_input;
940 vertex_input.sType =
941 VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VERTEX_INPUT_DYNAMIC_STATE_FEATURES_EXT;
942 vertex_input.pNext = nullptr;
943 features.pNext = &vertex_input;
944 physical.GetFeatures2KHR(features);
945
946 if (vertex_input.vertexInputDynamicState) {
947 extensions.push_back(VK_EXT_VERTEX_INPUT_DYNAMIC_STATE_EXTENSION_NAME);
948 ext_vertex_input_dynamic_state = true;
949 }
950 }
951 if (has_ext_shader_atomic_int64) {
952 VkPhysicalDeviceShaderAtomicInt64Features atomic_int64;
953 atomic_int64.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTENDED_DYNAMIC_STATE_FEATURES_EXT;
954 atomic_int64.pNext = nullptr;
955 features.pNext = &atomic_int64;
956 physical.GetFeatures2KHR(features);
957
958 if (atomic_int64.shaderBufferInt64Atomics && atomic_int64.shaderSharedInt64Atomics) {
959 extensions.push_back(VK_KHR_SHADER_ATOMIC_INT64_EXTENSION_NAME);
960 ext_shader_atomic_int64 = true;
961 }
962 }
729 if (has_ext_transform_feedback) { 963 if (has_ext_transform_feedback) {
730 VkPhysicalDeviceTransformFeedbackFeaturesEXT tfb_features; 964 VkPhysicalDeviceTransformFeedbackFeaturesEXT tfb_features;
731 tfb_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TRANSFORM_FEEDBACK_FEATURES_EXT; 965 tfb_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TRANSFORM_FEEDBACK_FEATURES_EXT;
@@ -760,17 +994,55 @@ std::vector<const char*> Device::LoadExtensions(bool requires_surface) {
760 } 994 }
761 } 995 }
762 if (has_ext_extended_dynamic_state) { 996 if (has_ext_extended_dynamic_state) {
763 VkPhysicalDeviceExtendedDynamicStateFeaturesEXT dynamic_state; 997 VkPhysicalDeviceExtendedDynamicStateFeaturesEXT extended_dynamic_state;
764 dynamic_state.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTENDED_DYNAMIC_STATE_FEATURES_EXT; 998 extended_dynamic_state.sType =
765 dynamic_state.pNext = nullptr; 999 VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTENDED_DYNAMIC_STATE_FEATURES_EXT;
766 features.pNext = &dynamic_state; 1000 extended_dynamic_state.pNext = nullptr;
1001 features.pNext = &extended_dynamic_state;
767 physical.GetFeatures2KHR(features); 1002 physical.GetFeatures2KHR(features);
768 1003
769 if (dynamic_state.extendedDynamicState) { 1004 if (extended_dynamic_state.extendedDynamicState) {
770 extensions.push_back(VK_EXT_EXTENDED_DYNAMIC_STATE_EXTENSION_NAME); 1005 extensions.push_back(VK_EXT_EXTENDED_DYNAMIC_STATE_EXTENSION_NAME);
771 ext_extended_dynamic_state = true; 1006 ext_extended_dynamic_state = true;
772 } 1007 }
773 } 1008 }
1009 if (has_ext_line_rasterization) {
1010 VkPhysicalDeviceLineRasterizationFeaturesEXT line_raster;
1011 line_raster.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_LINE_RASTERIZATION_FEATURES_EXT;
1012 line_raster.pNext = nullptr;
1013 features.pNext = &line_raster;
1014 physical.GetFeatures2KHR(features);
1015 if (line_raster.rectangularLines && line_raster.smoothLines) {
1016 extensions.push_back(VK_EXT_LINE_RASTERIZATION_EXTENSION_NAME);
1017 ext_line_rasterization = true;
1018 }
1019 }
1020 if (has_khr_workgroup_memory_explicit_layout) {
1021 VkPhysicalDeviceWorkgroupMemoryExplicitLayoutFeaturesKHR layout;
1022 layout.sType =
1023 VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_WORKGROUP_MEMORY_EXPLICIT_LAYOUT_FEATURES_KHR;
1024 layout.pNext = nullptr;
1025 features.pNext = &layout;
1026 physical.GetFeatures2KHR(features);
1027
1028 if (layout.workgroupMemoryExplicitLayout &&
1029 layout.workgroupMemoryExplicitLayout8BitAccess &&
1030 layout.workgroupMemoryExplicitLayout16BitAccess &&
1031 layout.workgroupMemoryExplicitLayoutScalarBlockLayout) {
1032 extensions.push_back(VK_KHR_WORKGROUP_MEMORY_EXPLICIT_LAYOUT_EXTENSION_NAME);
1033 khr_workgroup_memory_explicit_layout = true;
1034 }
1035 }
1036 if (khr_push_descriptor) {
1037 VkPhysicalDevicePushDescriptorPropertiesKHR push_descriptor;
1038 push_descriptor.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PUSH_DESCRIPTOR_PROPERTIES_KHR;
1039 push_descriptor.pNext = nullptr;
1040
1041 physical_properties.pNext = &push_descriptor;
1042 physical.GetProperties2KHR(physical_properties);
1043
1044 max_push_descriptors = push_descriptor.maxPushDescriptors;
1045 }
774 return extensions; 1046 return extensions;
775} 1047}
776 1048
@@ -806,11 +1078,25 @@ void Device::SetupFamilies(VkSurfaceKHR surface) {
806} 1078}
807 1079
808void Device::SetupFeatures() { 1080void Device::SetupFeatures() {
809 const auto supported_features{physical.GetFeatures()}; 1081 const VkPhysicalDeviceFeatures features{physical.GetFeatures()};
810 is_formatless_image_load_supported = supported_features.shaderStorageImageReadWithoutFormat; 1082 is_depth_bounds_supported = features.depthBounds;
811 is_shader_storage_image_multisample = supported_features.shaderStorageImageMultisample; 1083 is_formatless_image_load_supported = features.shaderStorageImageReadWithoutFormat;
1084 is_shader_float64_supported = features.shaderFloat64;
1085 is_shader_int64_supported = features.shaderInt64;
1086 is_shader_int16_supported = features.shaderInt16;
1087 is_shader_storage_image_multisample = features.shaderStorageImageMultisample;
812 is_blit_depth_stencil_supported = TestDepthStencilBlits(); 1088 is_blit_depth_stencil_supported = TestDepthStencilBlits();
813 is_optimal_astc_supported = IsOptimalAstcSupported(supported_features); 1089 is_optimal_astc_supported = IsOptimalAstcSupported(features);
1090}
1091
1092void Device::SetupProperties() {
1093 float_controls.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FLOAT_CONTROLS_PROPERTIES_KHR;
1094
1095 VkPhysicalDeviceProperties2KHR properties2{};
1096 properties2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2_KHR;
1097 properties2.pNext = &float_controls;
1098
1099 physical.GetProperties2KHR(properties2);
814} 1100}
815 1101
816void Device::CollectTelemetryParameters() { 1102void Device::CollectTelemetryParameters() {
@@ -832,12 +1118,6 @@ void Device::CollectTelemetryParameters() {
832 1118
833 driver_id = driver.driverID; 1119 driver_id = driver.driverID;
834 vendor_name = driver.driverName; 1120 vendor_name = driver.driverName;
835
836 const std::vector extensions = physical.EnumerateDeviceExtensionProperties();
837 reported_extensions.reserve(std::size(extensions));
838 for (const auto& extension : extensions) {
839 reported_extensions.emplace_back(extension.extensionName);
840 }
841} 1121}
842 1122
843void Device::CollectPhysicalMemoryInfo() { 1123void Device::CollectPhysicalMemoryInfo() {
diff --git a/src/video_core/vulkan_common/vulkan_device.h b/src/video_core/vulkan_common/vulkan_device.h
index 96c0f8c60..df394e384 100644
--- a/src/video_core/vulkan_common/vulkan_device.h
+++ b/src/video_core/vulkan_common/vulkan_device.h
@@ -4,6 +4,7 @@
4 4
5#pragma once 5#pragma once
6 6
7#include <span>
7#include <string> 8#include <string>
8#include <string_view> 9#include <string_view>
9#include <unordered_map> 10#include <unordered_map>
@@ -43,7 +44,7 @@ public:
43 void ReportLoss() const; 44 void ReportLoss() const;
44 45
45 /// Reports a shader to Nsight Aftermath. 46 /// Reports a shader to Nsight Aftermath.
46 void SaveShader(const std::vector<u32>& spirv) const; 47 void SaveShader(std::span<const u32> spirv) const;
47 48
48 /// Returns the name of the VkDriverId reported from Vulkan. 49 /// Returns the name of the VkDriverId reported from Vulkan.
49 std::string GetDriverName() const; 50 std::string GetDriverName() const;
@@ -128,6 +129,11 @@ public:
128 return properties.limits.maxComputeSharedMemorySize; 129 return properties.limits.maxComputeSharedMemorySize;
129 } 130 }
130 131
132 /// Returns float control properties of the device.
133 const VkPhysicalDeviceFloatControlsPropertiesKHR& FloatControlProperties() const {
134 return float_controls;
135 }
136
131 /// Returns true if ASTC is natively supported. 137 /// Returns true if ASTC is natively supported.
132 bool IsOptimalAstcSupported() const { 138 bool IsOptimalAstcSupported() const {
133 return is_optimal_astc_supported; 139 return is_optimal_astc_supported;
@@ -148,11 +154,31 @@ public:
148 return guest_warp_stages & stage; 154 return guest_warp_stages & stage;
149 } 155 }
150 156
157 /// Returns the maximum number of push descriptors.
158 u32 MaxPushDescriptors() const {
159 return max_push_descriptors;
160 }
161
151 /// Returns true if formatless image load is supported. 162 /// Returns true if formatless image load is supported.
152 bool IsFormatlessImageLoadSupported() const { 163 bool IsFormatlessImageLoadSupported() const {
153 return is_formatless_image_load_supported; 164 return is_formatless_image_load_supported;
154 } 165 }
155 166
167 /// Returns true if shader int64 is supported.
168 bool IsShaderInt64Supported() const {
169 return is_shader_int64_supported;
170 }
171
172 /// Returns true if shader int16 is supported.
173 bool IsShaderInt16Supported() const {
174 return is_shader_int16_supported;
175 }
176
177 // Returns true if depth bounds is supported.
178 bool IsDepthBoundsSupported() const {
179 return is_depth_bounds_supported;
180 }
181
156 /// Returns true when blitting from and to depth stencil images is supported. 182 /// Returns true when blitting from and to depth stencil images is supported.
157 bool IsBlitDepthStencilSupported() const { 183 bool IsBlitDepthStencilSupported() const {
158 return is_blit_depth_stencil_supported; 184 return is_blit_depth_stencil_supported;
@@ -163,11 +189,36 @@ public:
163 return nv_viewport_swizzle; 189 return nv_viewport_swizzle;
164 } 190 }
165 191
166 /// Returns true if the device supports VK_EXT_scalar_block_layout. 192 /// Returns true if the device supports VK_NV_viewport_array2.
193 bool IsNvViewportArray2Supported() const {
194 return nv_viewport_array2;
195 }
196
197 /// Returns true if the device supports VK_NV_geometry_shader_passthrough.
198 bool IsNvGeometryShaderPassthroughSupported() const {
199 return nv_geometry_shader_passthrough;
200 }
201
202 /// Returns true if the device supports VK_KHR_uniform_buffer_standard_layout.
167 bool IsKhrUniformBufferStandardLayoutSupported() const { 203 bool IsKhrUniformBufferStandardLayoutSupported() const {
168 return khr_uniform_buffer_standard_layout; 204 return khr_uniform_buffer_standard_layout;
169 } 205 }
170 206
207 /// Returns true if the device supports VK_KHR_spirv_1_4.
208 bool IsKhrSpirv1_4Supported() const {
209 return khr_spirv_1_4;
210 }
211
212 /// Returns true if the device supports VK_KHR_push_descriptor.
213 bool IsKhrPushDescriptorSupported() const {
214 return khr_push_descriptor;
215 }
216
217 /// Returns true if the device supports VK_KHR_workgroup_memory_explicit_layout.
218 bool IsKhrWorkgroupMemoryExplicitLayoutSupported() const {
219 return khr_workgroup_memory_explicit_layout;
220 }
221
171 /// Returns true if the device supports VK_EXT_index_type_uint8. 222 /// Returns true if the device supports VK_EXT_index_type_uint8.
172 bool IsExtIndexTypeUint8Supported() const { 223 bool IsExtIndexTypeUint8Supported() const {
173 return ext_index_type_uint8; 224 return ext_index_type_uint8;
@@ -188,6 +239,11 @@ public:
188 return ext_shader_viewport_index_layer; 239 return ext_shader_viewport_index_layer;
189 } 240 }
190 241
242 /// Returns true if the device supports VK_EXT_subgroup_size_control.
243 bool IsExtSubgroupSizeControlSupported() const {
244 return ext_subgroup_size_control;
245 }
246
191 /// Returns true if the device supports VK_EXT_transform_feedback. 247 /// Returns true if the device supports VK_EXT_transform_feedback.
192 bool IsExtTransformFeedbackSupported() const { 248 bool IsExtTransformFeedbackSupported() const {
193 return ext_transform_feedback; 249 return ext_transform_feedback;
@@ -203,11 +259,36 @@ public:
203 return ext_extended_dynamic_state; 259 return ext_extended_dynamic_state;
204 } 260 }
205 261
262 /// Returns true if the device supports VK_EXT_line_rasterization.
263 bool IsExtLineRasterizationSupported() const {
264 return ext_line_rasterization;
265 }
266
267 /// Returns true if the device supports VK_EXT_vertex_input_dynamic_state.
268 bool IsExtVertexInputDynamicStateSupported() const {
269 return ext_vertex_input_dynamic_state;
270 }
271
206 /// Returns true if the device supports VK_EXT_shader_stencil_export. 272 /// Returns true if the device supports VK_EXT_shader_stencil_export.
207 bool IsExtShaderStencilExportSupported() const { 273 bool IsExtShaderStencilExportSupported() const {
208 return ext_shader_stencil_export; 274 return ext_shader_stencil_export;
209 } 275 }
210 276
277 /// Returns true if the device supports VK_EXT_conservative_rasterization.
278 bool IsExtConservativeRasterizationSupported() const {
279 return ext_conservative_rasterization;
280 }
281
282 /// Returns true if the device supports VK_EXT_provoking_vertex.
283 bool IsExtProvokingVertexSupported() const {
284 return ext_provoking_vertex;
285 }
286
287 /// Returns true if the device supports VK_KHR_shader_atomic_int64.
288 bool IsExtShaderAtomicInt64Supported() const {
289 return ext_shader_atomic_int64;
290 }
291
211 /// Returns true when a known debugging tool is attached. 292 /// Returns true when a known debugging tool is attached.
212 bool HasDebuggingToolAttached() const { 293 bool HasDebuggingToolAttached() const {
213 return has_renderdoc || has_nsight_graphics; 294 return has_renderdoc || has_nsight_graphics;
@@ -220,12 +301,7 @@ public:
220 301
221 /// Returns the list of available extensions. 302 /// Returns the list of available extensions.
222 const std::vector<std::string>& GetAvailableExtensions() const { 303 const std::vector<std::string>& GetAvailableExtensions() const {
223 return reported_extensions; 304 return supported_extensions;
224 }
225
226 /// Returns true if the setting for async shader compilation is enabled.
227 bool UseAsynchronousShaders() const {
228 return use_asynchronous_shaders;
229 } 305 }
230 306
231 u64 GetDeviceLocalMemory() const { 307 u64 GetDeviceLocalMemory() const {
@@ -245,6 +321,9 @@ private:
245 /// Sets up device features. 321 /// Sets up device features.
246 void SetupFeatures(); 322 void SetupFeatures();
247 323
324 /// Sets up device properties.
325 void SetupProperties();
326
248 /// Collects telemetry information from the device. 327 /// Collects telemetry information from the device.
249 void CollectTelemetryParameters(); 328 void CollectTelemetryParameters();
250 329
@@ -267,46 +346,60 @@ private:
267 bool IsFormatSupported(VkFormat wanted_format, VkFormatFeatureFlags wanted_usage, 346 bool IsFormatSupported(VkFormat wanted_format, VkFormatFeatureFlags wanted_usage,
268 FormatType format_type) const; 347 FormatType format_type) const;
269 348
270 VkInstance instance; ///< Vulkan instance. 349 VkInstance instance; ///< Vulkan instance.
271 vk::DeviceDispatch dld; ///< Device function pointers. 350 vk::DeviceDispatch dld; ///< Device function pointers.
272 vk::PhysicalDevice physical; ///< Physical device. 351 vk::PhysicalDevice physical; ///< Physical device.
273 VkPhysicalDeviceProperties properties; ///< Device properties. 352 VkPhysicalDeviceProperties properties; ///< Device properties.
274 vk::Device logical; ///< Logical device. 353 VkPhysicalDeviceFloatControlsPropertiesKHR float_controls{}; ///< Float control properties.
275 vk::Queue graphics_queue; ///< Main graphics queue. 354 vk::Device logical; ///< Logical device.
276 vk::Queue present_queue; ///< Main present queue. 355 vk::Queue graphics_queue; ///< Main graphics queue.
277 u32 instance_version{}; ///< Vulkan onstance version. 356 vk::Queue present_queue; ///< Main present queue.
357 u32 instance_version{}; ///< Vulkan onstance version.
278 u32 graphics_family{}; ///< Main graphics queue family index. 358 u32 graphics_family{}; ///< Main graphics queue family index.
279 u32 present_family{}; ///< Main present queue family index. 359 u32 present_family{}; ///< Main present queue family index.
280 VkDriverIdKHR driver_id{}; ///< Driver ID. 360 VkDriverIdKHR driver_id{}; ///< Driver ID.
281 VkShaderStageFlags guest_warp_stages{}; ///< Stages where the guest warp size can be forced. 361 VkShaderStageFlags guest_warp_stages{}; ///< Stages where the guest warp size can be forced.
282 u64 device_access_memory{}; ///< Total size of device local memory in bytes. 362 u64 device_access_memory{}; ///< Total size of device local memory in bytes.
363 u32 max_push_descriptors{}; ///< Maximum number of push descriptors
283 bool is_optimal_astc_supported{}; ///< Support for native ASTC. 364 bool is_optimal_astc_supported{}; ///< Support for native ASTC.
284 bool is_float16_supported{}; ///< Support for float16 arithmetics. 365 bool is_float16_supported{}; ///< Support for float16 arithmetics.
285 bool is_warp_potentially_bigger{}; ///< Host warp size can be bigger than guest. 366 bool is_warp_potentially_bigger{}; ///< Host warp size can be bigger than guest.
286 bool is_formatless_image_load_supported{}; ///< Support for shader image read without format. 367 bool is_formatless_image_load_supported{}; ///< Support for shader image read without format.
368 bool is_depth_bounds_supported{}; ///< Support for depth bounds.
369 bool is_shader_float64_supported{}; ///< Support for float64.
370 bool is_shader_int64_supported{}; ///< Support for int64.
371 bool is_shader_int16_supported{}; ///< Support for int16.
287 bool is_shader_storage_image_multisample{}; ///< Support for image operations on MSAA images. 372 bool is_shader_storage_image_multisample{}; ///< Support for image operations on MSAA images.
288 bool is_blit_depth_stencil_supported{}; ///< Support for blitting from and to depth stencil. 373 bool is_blit_depth_stencil_supported{}; ///< Support for blitting from and to depth stencil.
289 bool nv_viewport_swizzle{}; ///< Support for VK_NV_viewport_swizzle. 374 bool nv_viewport_swizzle{}; ///< Support for VK_NV_viewport_swizzle.
290 bool khr_uniform_buffer_standard_layout{}; ///< Support for std430 on UBOs. 375 bool nv_viewport_array2{}; ///< Support for VK_NV_viewport_array2.
291 bool ext_index_type_uint8{}; ///< Support for VK_EXT_index_type_uint8. 376 bool nv_geometry_shader_passthrough{}; ///< Support for VK_NV_geometry_shader_passthrough.
292 bool ext_sampler_filter_minmax{}; ///< Support for VK_EXT_sampler_filter_minmax. 377 bool khr_uniform_buffer_standard_layout{}; ///< Support for scalar uniform buffer layouts.
293 bool ext_depth_range_unrestricted{}; ///< Support for VK_EXT_depth_range_unrestricted. 378 bool khr_spirv_1_4{}; ///< Support for VK_KHR_spirv_1_4.
294 bool ext_shader_viewport_index_layer{}; ///< Support for VK_EXT_shader_viewport_index_layer. 379 bool khr_workgroup_memory_explicit_layout{}; ///< Support for explicit workgroup layouts.
295 bool ext_tooling_info{}; ///< Support for VK_EXT_tooling_info. 380 bool khr_push_descriptor{}; ///< Support for VK_KHR_push_descritor.
296 bool ext_transform_feedback{}; ///< Support for VK_EXT_transform_feedback. 381 bool ext_index_type_uint8{}; ///< Support for VK_EXT_index_type_uint8.
297 bool ext_custom_border_color{}; ///< Support for VK_EXT_custom_border_color. 382 bool ext_sampler_filter_minmax{}; ///< Support for VK_EXT_sampler_filter_minmax.
298 bool ext_extended_dynamic_state{}; ///< Support for VK_EXT_extended_dynamic_state. 383 bool ext_depth_range_unrestricted{}; ///< Support for VK_EXT_depth_range_unrestricted.
299 bool ext_shader_stencil_export{}; ///< Support for VK_EXT_shader_stencil_export. 384 bool ext_shader_viewport_index_layer{}; ///< Support for VK_EXT_shader_viewport_index_layer.
300 bool nv_device_diagnostics_config{}; ///< Support for VK_NV_device_diagnostics_config. 385 bool ext_tooling_info{}; ///< Support for VK_EXT_tooling_info.
301 bool has_renderdoc{}; ///< Has RenderDoc attached 386 bool ext_subgroup_size_control{}; ///< Support for VK_EXT_subgroup_size_control.
302 bool has_nsight_graphics{}; ///< Has Nsight Graphics attached 387 bool ext_transform_feedback{}; ///< Support for VK_EXT_transform_feedback.
303 388 bool ext_custom_border_color{}; ///< Support for VK_EXT_custom_border_color.
304 // Asynchronous Graphics Pipeline setting 389 bool ext_extended_dynamic_state{}; ///< Support for VK_EXT_extended_dynamic_state.
305 bool use_asynchronous_shaders{}; ///< Setting to use asynchronous shaders/graphics pipeline 390 bool ext_line_rasterization{}; ///< Support for VK_EXT_line_rasterization.
391 bool ext_vertex_input_dynamic_state{}; ///< Support for VK_EXT_vertex_input_dynamic_state.
392 bool ext_shader_stencil_export{}; ///< Support for VK_EXT_shader_stencil_export.
393 bool ext_shader_atomic_int64{}; ///< Support for VK_KHR_shader_atomic_int64.
394 bool ext_conservative_rasterization{}; ///< Support for VK_EXT_conservative_rasterization.
395 bool ext_provoking_vertex{}; ///< Support for VK_EXT_provoking_vertex.
396 bool nv_device_diagnostics_config{}; ///< Support for VK_NV_device_diagnostics_config.
397 bool has_renderdoc{}; ///< Has RenderDoc attached
398 bool has_nsight_graphics{}; ///< Has Nsight Graphics attached
306 399
307 // Telemetry parameters 400 // Telemetry parameters
308 std::string vendor_name; ///< Device's driver name. 401 std::string vendor_name; ///< Device's driver name.
309 std::vector<std::string> reported_extensions; ///< Reported Vulkan extensions. 402 std::vector<std::string> supported_extensions; ///< Reported Vulkan extensions.
310 403
311 /// Format properties dictionary. 404 /// Format properties dictionary.
312 std::unordered_map<VkFormat, VkFormatProperties> format_properties; 405 std::unordered_map<VkFormat, VkFormatProperties> format_properties;
diff --git a/src/video_core/vulkan_common/vulkan_wrapper.cpp b/src/video_core/vulkan_common/vulkan_wrapper.cpp
index 2aa0ffbe6..bbf0fccae 100644
--- a/src/video_core/vulkan_common/vulkan_wrapper.cpp
+++ b/src/video_core/vulkan_common/vulkan_wrapper.cpp
@@ -103,6 +103,7 @@ void Load(VkDevice device, DeviceDispatch& dld) noexcept {
103 X(vkCmdFillBuffer); 103 X(vkCmdFillBuffer);
104 X(vkCmdPipelineBarrier); 104 X(vkCmdPipelineBarrier);
105 X(vkCmdPushConstants); 105 X(vkCmdPushConstants);
106 X(vkCmdPushDescriptorSetWithTemplateKHR);
106 X(vkCmdSetBlendConstants); 107 X(vkCmdSetBlendConstants);
107 X(vkCmdSetDepthBias); 108 X(vkCmdSetDepthBias);
108 X(vkCmdSetDepthBounds); 109 X(vkCmdSetDepthBounds);
@@ -120,9 +121,11 @@ void Load(VkDevice device, DeviceDispatch& dld) noexcept {
120 X(vkCmdSetDepthTestEnableEXT); 121 X(vkCmdSetDepthTestEnableEXT);
121 X(vkCmdSetDepthWriteEnableEXT); 122 X(vkCmdSetDepthWriteEnableEXT);
122 X(vkCmdSetFrontFaceEXT); 123 X(vkCmdSetFrontFaceEXT);
124 X(vkCmdSetLineWidth);
123 X(vkCmdSetPrimitiveTopologyEXT); 125 X(vkCmdSetPrimitiveTopologyEXT);
124 X(vkCmdSetStencilOpEXT); 126 X(vkCmdSetStencilOpEXT);
125 X(vkCmdSetStencilTestEnableEXT); 127 X(vkCmdSetStencilTestEnableEXT);
128 X(vkCmdSetVertexInputEXT);
126 X(vkCmdResolveImage); 129 X(vkCmdResolveImage);
127 X(vkCreateBuffer); 130 X(vkCreateBuffer);
128 X(vkCreateBufferView); 131 X(vkCreateBufferView);
@@ -311,8 +314,6 @@ const char* ToString(VkResult result) noexcept {
311 return "VK_ERROR_FULL_SCREEN_EXCLUSIVE_MODE_LOST_EXT"; 314 return "VK_ERROR_FULL_SCREEN_EXCLUSIVE_MODE_LOST_EXT";
312 case VkResult::VK_ERROR_UNKNOWN: 315 case VkResult::VK_ERROR_UNKNOWN:
313 return "VK_ERROR_UNKNOWN"; 316 return "VK_ERROR_UNKNOWN";
314 case VkResult::VK_ERROR_INCOMPATIBLE_VERSION_KHR:
315 return "VK_ERROR_INCOMPATIBLE_VERSION_KHR";
316 case VkResult::VK_THREAD_IDLE_KHR: 317 case VkResult::VK_THREAD_IDLE_KHR:
317 return "VK_THREAD_IDLE_KHR"; 318 return "VK_THREAD_IDLE_KHR";
318 case VkResult::VK_THREAD_DONE_KHR: 319 case VkResult::VK_THREAD_DONE_KHR:
diff --git a/src/video_core/vulkan_common/vulkan_wrapper.h b/src/video_core/vulkan_common/vulkan_wrapper.h
index 3e36d356a..d76bb4324 100644
--- a/src/video_core/vulkan_common/vulkan_wrapper.h
+++ b/src/video_core/vulkan_common/vulkan_wrapper.h
@@ -193,15 +193,16 @@ struct DeviceDispatch : InstanceDispatch {
193 PFN_vkBeginCommandBuffer vkBeginCommandBuffer{}; 193 PFN_vkBeginCommandBuffer vkBeginCommandBuffer{};
194 PFN_vkBindBufferMemory vkBindBufferMemory{}; 194 PFN_vkBindBufferMemory vkBindBufferMemory{};
195 PFN_vkBindImageMemory vkBindImageMemory{}; 195 PFN_vkBindImageMemory vkBindImageMemory{};
196 PFN_vkCmdBeginDebugUtilsLabelEXT vkCmdBeginDebugUtilsLabelEXT{};
196 PFN_vkCmdBeginQuery vkCmdBeginQuery{}; 197 PFN_vkCmdBeginQuery vkCmdBeginQuery{};
197 PFN_vkCmdBeginRenderPass vkCmdBeginRenderPass{}; 198 PFN_vkCmdBeginRenderPass vkCmdBeginRenderPass{};
198 PFN_vkCmdBeginTransformFeedbackEXT vkCmdBeginTransformFeedbackEXT{}; 199 PFN_vkCmdBeginTransformFeedbackEXT vkCmdBeginTransformFeedbackEXT{};
199 PFN_vkCmdBeginDebugUtilsLabelEXT vkCmdBeginDebugUtilsLabelEXT{};
200 PFN_vkCmdBindDescriptorSets vkCmdBindDescriptorSets{}; 200 PFN_vkCmdBindDescriptorSets vkCmdBindDescriptorSets{};
201 PFN_vkCmdBindIndexBuffer vkCmdBindIndexBuffer{}; 201 PFN_vkCmdBindIndexBuffer vkCmdBindIndexBuffer{};
202 PFN_vkCmdBindPipeline vkCmdBindPipeline{}; 202 PFN_vkCmdBindPipeline vkCmdBindPipeline{};
203 PFN_vkCmdBindTransformFeedbackBuffersEXT vkCmdBindTransformFeedbackBuffersEXT{}; 203 PFN_vkCmdBindTransformFeedbackBuffersEXT vkCmdBindTransformFeedbackBuffersEXT{};
204 PFN_vkCmdBindVertexBuffers vkCmdBindVertexBuffers{}; 204 PFN_vkCmdBindVertexBuffers vkCmdBindVertexBuffers{};
205 PFN_vkCmdBindVertexBuffers2EXT vkCmdBindVertexBuffers2EXT{};
205 PFN_vkCmdBlitImage vkCmdBlitImage{}; 206 PFN_vkCmdBlitImage vkCmdBlitImage{};
206 PFN_vkCmdClearAttachments vkCmdClearAttachments{}; 207 PFN_vkCmdClearAttachments vkCmdClearAttachments{};
207 PFN_vkCmdCopyBuffer vkCmdCopyBuffer{}; 208 PFN_vkCmdCopyBuffer vkCmdCopyBuffer{};
@@ -211,34 +212,36 @@ struct DeviceDispatch : InstanceDispatch {
211 PFN_vkCmdDispatch vkCmdDispatch{}; 212 PFN_vkCmdDispatch vkCmdDispatch{};
212 PFN_vkCmdDraw vkCmdDraw{}; 213 PFN_vkCmdDraw vkCmdDraw{};
213 PFN_vkCmdDrawIndexed vkCmdDrawIndexed{}; 214 PFN_vkCmdDrawIndexed vkCmdDrawIndexed{};
215 PFN_vkCmdEndDebugUtilsLabelEXT vkCmdEndDebugUtilsLabelEXT{};
214 PFN_vkCmdEndQuery vkCmdEndQuery{}; 216 PFN_vkCmdEndQuery vkCmdEndQuery{};
215 PFN_vkCmdEndRenderPass vkCmdEndRenderPass{}; 217 PFN_vkCmdEndRenderPass vkCmdEndRenderPass{};
216 PFN_vkCmdEndTransformFeedbackEXT vkCmdEndTransformFeedbackEXT{}; 218 PFN_vkCmdEndTransformFeedbackEXT vkCmdEndTransformFeedbackEXT{};
217 PFN_vkCmdEndDebugUtilsLabelEXT vkCmdEndDebugUtilsLabelEXT{};
218 PFN_vkCmdFillBuffer vkCmdFillBuffer{}; 219 PFN_vkCmdFillBuffer vkCmdFillBuffer{};
219 PFN_vkCmdPipelineBarrier vkCmdPipelineBarrier{}; 220 PFN_vkCmdPipelineBarrier vkCmdPipelineBarrier{};
220 PFN_vkCmdPushConstants vkCmdPushConstants{}; 221 PFN_vkCmdPushConstants vkCmdPushConstants{};
222 PFN_vkCmdPushDescriptorSetWithTemplateKHR vkCmdPushDescriptorSetWithTemplateKHR{};
223 PFN_vkCmdResolveImage vkCmdResolveImage{};
221 PFN_vkCmdSetBlendConstants vkCmdSetBlendConstants{}; 224 PFN_vkCmdSetBlendConstants vkCmdSetBlendConstants{};
225 PFN_vkCmdSetCullModeEXT vkCmdSetCullModeEXT{};
222 PFN_vkCmdSetDepthBias vkCmdSetDepthBias{}; 226 PFN_vkCmdSetDepthBias vkCmdSetDepthBias{};
223 PFN_vkCmdSetDepthBounds vkCmdSetDepthBounds{}; 227 PFN_vkCmdSetDepthBounds vkCmdSetDepthBounds{};
224 PFN_vkCmdSetEvent vkCmdSetEvent{};
225 PFN_vkCmdSetScissor vkCmdSetScissor{};
226 PFN_vkCmdSetStencilCompareMask vkCmdSetStencilCompareMask{};
227 PFN_vkCmdSetStencilReference vkCmdSetStencilReference{};
228 PFN_vkCmdSetStencilWriteMask vkCmdSetStencilWriteMask{};
229 PFN_vkCmdSetViewport vkCmdSetViewport{};
230 PFN_vkCmdWaitEvents vkCmdWaitEvents{};
231 PFN_vkCmdBindVertexBuffers2EXT vkCmdBindVertexBuffers2EXT{};
232 PFN_vkCmdSetCullModeEXT vkCmdSetCullModeEXT{};
233 PFN_vkCmdSetDepthBoundsTestEnableEXT vkCmdSetDepthBoundsTestEnableEXT{}; 228 PFN_vkCmdSetDepthBoundsTestEnableEXT vkCmdSetDepthBoundsTestEnableEXT{};
234 PFN_vkCmdSetDepthCompareOpEXT vkCmdSetDepthCompareOpEXT{}; 229 PFN_vkCmdSetDepthCompareOpEXT vkCmdSetDepthCompareOpEXT{};
235 PFN_vkCmdSetDepthTestEnableEXT vkCmdSetDepthTestEnableEXT{}; 230 PFN_vkCmdSetDepthTestEnableEXT vkCmdSetDepthTestEnableEXT{};
236 PFN_vkCmdSetDepthWriteEnableEXT vkCmdSetDepthWriteEnableEXT{}; 231 PFN_vkCmdSetDepthWriteEnableEXT vkCmdSetDepthWriteEnableEXT{};
232 PFN_vkCmdSetEvent vkCmdSetEvent{};
237 PFN_vkCmdSetFrontFaceEXT vkCmdSetFrontFaceEXT{}; 233 PFN_vkCmdSetFrontFaceEXT vkCmdSetFrontFaceEXT{};
234 PFN_vkCmdSetLineWidth vkCmdSetLineWidth{};
238 PFN_vkCmdSetPrimitiveTopologyEXT vkCmdSetPrimitiveTopologyEXT{}; 235 PFN_vkCmdSetPrimitiveTopologyEXT vkCmdSetPrimitiveTopologyEXT{};
236 PFN_vkCmdSetScissor vkCmdSetScissor{};
237 PFN_vkCmdSetStencilCompareMask vkCmdSetStencilCompareMask{};
239 PFN_vkCmdSetStencilOpEXT vkCmdSetStencilOpEXT{}; 238 PFN_vkCmdSetStencilOpEXT vkCmdSetStencilOpEXT{};
239 PFN_vkCmdSetStencilReference vkCmdSetStencilReference{};
240 PFN_vkCmdSetStencilTestEnableEXT vkCmdSetStencilTestEnableEXT{}; 240 PFN_vkCmdSetStencilTestEnableEXT vkCmdSetStencilTestEnableEXT{};
241 PFN_vkCmdResolveImage vkCmdResolveImage{}; 241 PFN_vkCmdSetStencilWriteMask vkCmdSetStencilWriteMask{};
242 PFN_vkCmdSetVertexInputEXT vkCmdSetVertexInputEXT{};
243 PFN_vkCmdSetViewport vkCmdSetViewport{};
244 PFN_vkCmdWaitEvents vkCmdWaitEvents{};
242 PFN_vkCreateBuffer vkCreateBuffer{}; 245 PFN_vkCreateBuffer vkCreateBuffer{};
243 PFN_vkCreateBufferView vkCreateBufferView{}; 246 PFN_vkCreateBufferView vkCreateBufferView{};
244 PFN_vkCreateCommandPool vkCreateCommandPool{}; 247 PFN_vkCreateCommandPool vkCreateCommandPool{};
@@ -989,6 +992,12 @@ public:
989 dynamic_offsets.size(), dynamic_offsets.data()); 992 dynamic_offsets.size(), dynamic_offsets.data());
990 } 993 }
991 994
995 void PushDescriptorSetWithTemplateKHR(VkDescriptorUpdateTemplateKHR update_template,
996 VkPipelineLayout layout, u32 set,
997 const void* data) const noexcept {
998 dld->vkCmdPushDescriptorSetWithTemplateKHR(handle, update_template, layout, set, data);
999 }
1000
992 void BindPipeline(VkPipelineBindPoint bind_point, VkPipeline pipeline) const noexcept { 1001 void BindPipeline(VkPipelineBindPoint bind_point, VkPipeline pipeline) const noexcept {
993 dld->vkCmdBindPipeline(handle, bind_point, pipeline); 1002 dld->vkCmdBindPipeline(handle, bind_point, pipeline);
994 } 1003 }
@@ -1190,6 +1199,10 @@ public:
1190 dld->vkCmdSetFrontFaceEXT(handle, front_face); 1199 dld->vkCmdSetFrontFaceEXT(handle, front_face);
1191 } 1200 }
1192 1201
1202 void SetLineWidth(float line_width) const noexcept {
1203 dld->vkCmdSetLineWidth(handle, line_width);
1204 }
1205
1193 void SetPrimitiveTopologyEXT(VkPrimitiveTopology primitive_topology) const noexcept { 1206 void SetPrimitiveTopologyEXT(VkPrimitiveTopology primitive_topology) const noexcept {
1194 dld->vkCmdSetPrimitiveTopologyEXT(handle, primitive_topology); 1207 dld->vkCmdSetPrimitiveTopologyEXT(handle, primitive_topology);
1195 } 1208 }
@@ -1203,6 +1216,13 @@ public:
1203 dld->vkCmdSetStencilTestEnableEXT(handle, enable ? VK_TRUE : VK_FALSE); 1216 dld->vkCmdSetStencilTestEnableEXT(handle, enable ? VK_TRUE : VK_FALSE);
1204 } 1217 }
1205 1218
1219 void SetVertexInputEXT(
1220 vk::Span<VkVertexInputBindingDescription2EXT> bindings,
1221 vk::Span<VkVertexInputAttributeDescription2EXT> attributes) const noexcept {
1222 dld->vkCmdSetVertexInputEXT(handle, bindings.size(), bindings.data(), attributes.size(),
1223 attributes.data());
1224 }
1225
1206 void BindTransformFeedbackBuffersEXT(u32 first, u32 count, const VkBuffer* buffers, 1226 void BindTransformFeedbackBuffersEXT(u32 first, u32 count, const VkBuffer* buffers,
1207 const VkDeviceSize* offsets, 1227 const VkDeviceSize* offsets,
1208 const VkDeviceSize* sizes) const noexcept { 1228 const VkDeviceSize* sizes) const noexcept {