summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--CMakeModules/GenerateSCMRev.cmake56
m---------externals/Vulkan-Headers0
m---------externals/sirit0
-rw-r--r--src/common/CMakeLists.txt56
-rw-r--r--src/video_core/CMakeLists.txt64
-rw-r--r--src/video_core/engines/const_buffer_engine_interface.h103
-rw-r--r--src/video_core/engines/kepler_compute.cpp44
-rw-r--r--src/video_core/engines/kepler_compute.h20
-rw-r--r--src/video_core/engines/maxwell_3d.cpp38
-rw-r--r--src/video_core/engines/maxwell_3d.h20
-rw-r--r--src/video_core/guest_driver.cpp37
-rw-r--r--src/video_core/guest_driver.h46
-rw-r--r--src/video_core/rasterizer_interface.h16
-rw-r--r--src/video_core/renderer_opengl/gl_arb_decompiler.cpp2124
-rw-r--r--src/video_core/renderer_opengl/gl_arb_decompiler.h29
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp314
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.h33
-rw-r--r--src/video_core/renderer_opengl/gl_shader_cache.cpp564
-rw-r--r--src/video_core/renderer_opengl/gl_shader_cache.h102
-rw-r--r--src/video_core/renderer_opengl/gl_shader_decompiler.cpp2986
-rw-r--r--src/video_core/renderer_opengl/gl_shader_decompiler.h69
-rw-r--r--src/video_core/renderer_opengl/gl_shader_disk_cache.cpp482
-rw-r--r--src/video_core/renderer_opengl/gl_shader_disk_cache.h176
-rw-r--r--src/video_core/renderer_vulkan/blit_image.cpp1
-rw-r--r--src/video_core/renderer_vulkan/vk_compute_pipeline.cpp136
-rw-r--r--src/video_core/renderer_vulkan/vk_compute_pipeline.h47
-rw-r--r--src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp484
-rw-r--r--src/video_core/renderer_vulkan/vk_graphics_pipeline.h103
-rw-r--r--src/video_core/renderer_vulkan/vk_pipeline_cache.cpp375
-rw-r--r--src/video_core/renderer_vulkan/vk_pipeline_cache.h91
-rw-r--r--src/video_core/renderer_vulkan/vk_rasterizer.cpp361
-rw-r--r--src/video_core/renderer_vulkan/vk_rasterizer.h47
-rw-r--r--src/video_core/shader/ast.cpp752
-rw-r--r--src/video_core/shader/ast.h398
-rw-r--r--src/video_core/shader/async_shaders.cpp234
-rw-r--r--src/video_core/shader/async_shaders.h138
-rw-r--r--src/video_core/shader/compiler_settings.cpp26
-rw-r--r--src/video_core/shader/compiler_settings.h26
-rw-r--r--src/video_core/shader/control_flow.cpp751
-rw-r--r--src/video_core/shader/control_flow.h117
-rw-r--r--src/video_core/shader/decode.cpp368
-rw-r--r--src/video_core/shader/decode/arithmetic.cpp166
-rw-r--r--src/video_core/shader/decode/arithmetic_half.cpp101
-rw-r--r--src/video_core/shader/decode/arithmetic_half_immediate.cpp54
-rw-r--r--src/video_core/shader/decode/arithmetic_immediate.cpp53
-rw-r--r--src/video_core/shader/decode/arithmetic_integer.cpp375
-rw-r--r--src/video_core/shader/decode/arithmetic_integer_immediate.cpp99
-rw-r--r--src/video_core/shader/decode/bfe.cpp77
-rw-r--r--src/video_core/shader/decode/bfi.cpp45
-rw-r--r--src/video_core/shader/decode/conversion.cpp321
-rw-r--r--src/video_core/shader/decode/ffma.cpp62
-rw-r--r--src/video_core/shader/decode/float_set.cpp58
-rw-r--r--src/video_core/shader/decode/float_set_predicate.cpp57
-rw-r--r--src/video_core/shader/decode/half_set.cpp115
-rw-r--r--src/video_core/shader/decode/half_set_predicate.cpp80
-rw-r--r--src/video_core/shader/decode/hfma2.cpp73
-rw-r--r--src/video_core/shader/decode/image.cpp536
-rw-r--r--src/video_core/shader/decode/integer_set.cpp49
-rw-r--r--src/video_core/shader/decode/integer_set_predicate.cpp53
-rw-r--r--src/video_core/shader/decode/memory.cpp493
-rw-r--r--src/video_core/shader/decode/other.cpp322
-rw-r--r--src/video_core/shader/decode/predicate_set_predicate.cpp68
-rw-r--r--src/video_core/shader/decode/predicate_set_register.cpp46
-rw-r--r--src/video_core/shader/decode/register_set_predicate.cpp86
-rw-r--r--src/video_core/shader/decode/shift.cpp153
-rw-r--r--src/video_core/shader/decode/texture.cpp935
-rw-r--r--src/video_core/shader/decode/video.cpp169
-rw-r--r--src/video_core/shader/decode/warp.cpp117
-rw-r--r--src/video_core/shader/decode/xmad.cpp156
-rw-r--r--src/video_core/shader/expr.cpp93
-rw-r--r--src/video_core/shader/expr.h156
-rw-r--r--src/video_core/shader/memory_util.cpp76
-rw-r--r--src/video_core/shader/memory_util.h43
-rw-r--r--src/video_core/shader/node.h701
-rw-r--r--src/video_core/shader/node_helper.cpp115
-rw-r--r--src/video_core/shader/node_helper.h71
-rw-r--r--src/video_core/shader/registry.cpp181
-rw-r--r--src/video_core/shader/registry.h172
-rw-r--r--src/video_core/shader/shader_ir.cpp464
-rw-r--r--src/video_core/shader/shader_ir.h479
-rw-r--r--src/video_core/shader/track.cpp236
-rw-r--r--src/video_core/shader/transform_feedback.cpp115
-rw-r--r--src/video_core/shader/transform_feedback.h23
83 files changed, 55 insertions, 19623 deletions
diff --git a/CMakeModules/GenerateSCMRev.cmake b/CMakeModules/GenerateSCMRev.cmake
index 311ba1c2e..77358768e 100644
--- a/CMakeModules/GenerateSCMRev.cmake
+++ b/CMakeModules/GenerateSCMRev.cmake
@@ -51,61 +51,7 @@ endif()
51# The variable SRC_DIR must be passed into the script (since it uses the current build directory for all values of CMAKE_*_DIR) 51# The variable SRC_DIR must be passed into the script (since it uses the current build directory for all values of CMAKE_*_DIR)
52set(VIDEO_CORE "${SRC_DIR}/src/video_core") 52set(VIDEO_CORE "${SRC_DIR}/src/video_core")
53set(HASH_FILES 53set(HASH_FILES
54 "${VIDEO_CORE}/renderer_opengl/gl_arb_decompiler.cpp" 54 # ...
55 "${VIDEO_CORE}/renderer_opengl/gl_arb_decompiler.h"
56 "${VIDEO_CORE}/renderer_opengl/gl_shader_cache.cpp"
57 "${VIDEO_CORE}/renderer_opengl/gl_shader_cache.h"
58 "${VIDEO_CORE}/renderer_opengl/gl_shader_decompiler.cpp"
59 "${VIDEO_CORE}/renderer_opengl/gl_shader_decompiler.h"
60 "${VIDEO_CORE}/renderer_opengl/gl_shader_disk_cache.cpp"
61 "${VIDEO_CORE}/renderer_opengl/gl_shader_disk_cache.h"
62 "${VIDEO_CORE}/shader/decode/arithmetic.cpp"
63 "${VIDEO_CORE}/shader/decode/arithmetic_half.cpp"
64 "${VIDEO_CORE}/shader/decode/arithmetic_half_immediate.cpp"
65 "${VIDEO_CORE}/shader/decode/arithmetic_immediate.cpp"
66 "${VIDEO_CORE}/shader/decode/arithmetic_integer.cpp"
67 "${VIDEO_CORE}/shader/decode/arithmetic_integer_immediate.cpp"
68 "${VIDEO_CORE}/shader/decode/bfe.cpp"
69 "${VIDEO_CORE}/shader/decode/bfi.cpp"
70 "${VIDEO_CORE}/shader/decode/conversion.cpp"
71 "${VIDEO_CORE}/shader/decode/ffma.cpp"
72 "${VIDEO_CORE}/shader/decode/float_set.cpp"
73 "${VIDEO_CORE}/shader/decode/float_set_predicate.cpp"
74 "${VIDEO_CORE}/shader/decode/half_set.cpp"
75 "${VIDEO_CORE}/shader/decode/half_set_predicate.cpp"
76 "${VIDEO_CORE}/shader/decode/hfma2.cpp"
77 "${VIDEO_CORE}/shader/decode/image.cpp"
78 "${VIDEO_CORE}/shader/decode/integer_set.cpp"
79 "${VIDEO_CORE}/shader/decode/integer_set_predicate.cpp"
80 "${VIDEO_CORE}/shader/decode/memory.cpp"
81 "${VIDEO_CORE}/shader/decode/texture.cpp"
82 "${VIDEO_CORE}/shader/decode/other.cpp"
83 "${VIDEO_CORE}/shader/decode/predicate_set_predicate.cpp"
84 "${VIDEO_CORE}/shader/decode/predicate_set_register.cpp"
85 "${VIDEO_CORE}/shader/decode/register_set_predicate.cpp"
86 "${VIDEO_CORE}/shader/decode/shift.cpp"
87 "${VIDEO_CORE}/shader/decode/video.cpp"
88 "${VIDEO_CORE}/shader/decode/warp.cpp"
89 "${VIDEO_CORE}/shader/decode/xmad.cpp"
90 "${VIDEO_CORE}/shader/ast.cpp"
91 "${VIDEO_CORE}/shader/ast.h"
92 "${VIDEO_CORE}/shader/compiler_settings.cpp"
93 "${VIDEO_CORE}/shader/compiler_settings.h"
94 "${VIDEO_CORE}/shader/control_flow.cpp"
95 "${VIDEO_CORE}/shader/control_flow.h"
96 "${VIDEO_CORE}/shader/decode.cpp"
97 "${VIDEO_CORE}/shader/expr.cpp"
98 "${VIDEO_CORE}/shader/expr.h"
99 "${VIDEO_CORE}/shader/node.h"
100 "${VIDEO_CORE}/shader/node_helper.cpp"
101 "${VIDEO_CORE}/shader/node_helper.h"
102 "${VIDEO_CORE}/shader/registry.cpp"
103 "${VIDEO_CORE}/shader/registry.h"
104 "${VIDEO_CORE}/shader/shader_ir.cpp"
105 "${VIDEO_CORE}/shader/shader_ir.h"
106 "${VIDEO_CORE}/shader/track.cpp"
107 "${VIDEO_CORE}/shader/transform_feedback.cpp"
108 "${VIDEO_CORE}/shader/transform_feedback.h"
109) 55)
110set(COMBINED "") 56set(COMBINED "")
111foreach (F IN LISTS HASH_FILES) 57foreach (F IN LISTS HASH_FILES)
diff --git a/externals/Vulkan-Headers b/externals/Vulkan-Headers
Subproject 8188e3fbbc105591064093440f88081fb957d4f Subproject 07c4a37bcf41ea50aef6e98236abdfe8089fb4c
diff --git a/externals/sirit b/externals/sirit
Subproject 200310e8faa756b9869dd6dfc902c255246ac74 Subproject a39596358a3a5488c06554c0c15184a6af71e43
diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt
index e03fffd8d..c92266a17 100644
--- a/src/common/CMakeLists.txt
+++ b/src/common/CMakeLists.txt
@@ -32,61 +32,7 @@ add_custom_command(OUTPUT scm_rev.cpp
32 DEPENDS 32 DEPENDS
33 # WARNING! It was too much work to try and make a common location for this list, 33 # WARNING! It was too much work to try and make a common location for this list,
34 # so if you need to change it, please update CMakeModules/GenerateSCMRev.cmake as well 34 # so if you need to change it, please update CMakeModules/GenerateSCMRev.cmake as well
35 "${VIDEO_CORE}/renderer_opengl/gl_arb_decompiler.cpp" 35 # ...
36 "${VIDEO_CORE}/renderer_opengl/gl_arb_decompiler.h"
37 "${VIDEO_CORE}/renderer_opengl/gl_shader_cache.cpp"
38 "${VIDEO_CORE}/renderer_opengl/gl_shader_cache.h"
39 "${VIDEO_CORE}/renderer_opengl/gl_shader_decompiler.cpp"
40 "${VIDEO_CORE}/renderer_opengl/gl_shader_decompiler.h"
41 "${VIDEO_CORE}/renderer_opengl/gl_shader_disk_cache.cpp"
42 "${VIDEO_CORE}/renderer_opengl/gl_shader_disk_cache.h"
43 "${VIDEO_CORE}/shader/decode/arithmetic.cpp"
44 "${VIDEO_CORE}/shader/decode/arithmetic_half.cpp"
45 "${VIDEO_CORE}/shader/decode/arithmetic_half_immediate.cpp"
46 "${VIDEO_CORE}/shader/decode/arithmetic_immediate.cpp"
47 "${VIDEO_CORE}/shader/decode/arithmetic_integer.cpp"
48 "${VIDEO_CORE}/shader/decode/arithmetic_integer_immediate.cpp"
49 "${VIDEO_CORE}/shader/decode/bfe.cpp"
50 "${VIDEO_CORE}/shader/decode/bfi.cpp"
51 "${VIDEO_CORE}/shader/decode/conversion.cpp"
52 "${VIDEO_CORE}/shader/decode/ffma.cpp"
53 "${VIDEO_CORE}/shader/decode/float_set.cpp"
54 "${VIDEO_CORE}/shader/decode/float_set_predicate.cpp"
55 "${VIDEO_CORE}/shader/decode/half_set.cpp"
56 "${VIDEO_CORE}/shader/decode/half_set_predicate.cpp"
57 "${VIDEO_CORE}/shader/decode/hfma2.cpp"
58 "${VIDEO_CORE}/shader/decode/image.cpp"
59 "${VIDEO_CORE}/shader/decode/integer_set.cpp"
60 "${VIDEO_CORE}/shader/decode/integer_set_predicate.cpp"
61 "${VIDEO_CORE}/shader/decode/memory.cpp"
62 "${VIDEO_CORE}/shader/decode/texture.cpp"
63 "${VIDEO_CORE}/shader/decode/other.cpp"
64 "${VIDEO_CORE}/shader/decode/predicate_set_predicate.cpp"
65 "${VIDEO_CORE}/shader/decode/predicate_set_register.cpp"
66 "${VIDEO_CORE}/shader/decode/register_set_predicate.cpp"
67 "${VIDEO_CORE}/shader/decode/shift.cpp"
68 "${VIDEO_CORE}/shader/decode/video.cpp"
69 "${VIDEO_CORE}/shader/decode/warp.cpp"
70 "${VIDEO_CORE}/shader/decode/xmad.cpp"
71 "${VIDEO_CORE}/shader/ast.cpp"
72 "${VIDEO_CORE}/shader/ast.h"
73 "${VIDEO_CORE}/shader/compiler_settings.cpp"
74 "${VIDEO_CORE}/shader/compiler_settings.h"
75 "${VIDEO_CORE}/shader/control_flow.cpp"
76 "${VIDEO_CORE}/shader/control_flow.h"
77 "${VIDEO_CORE}/shader/decode.cpp"
78 "${VIDEO_CORE}/shader/expr.cpp"
79 "${VIDEO_CORE}/shader/expr.h"
80 "${VIDEO_CORE}/shader/node.h"
81 "${VIDEO_CORE}/shader/node_helper.cpp"
82 "${VIDEO_CORE}/shader/node_helper.h"
83 "${VIDEO_CORE}/shader/registry.cpp"
84 "${VIDEO_CORE}/shader/registry.h"
85 "${VIDEO_CORE}/shader/shader_ir.cpp"
86 "${VIDEO_CORE}/shader/shader_ir.h"
87 "${VIDEO_CORE}/shader/track.cpp"
88 "${VIDEO_CORE}/shader/transform_feedback.cpp"
89 "${VIDEO_CORE}/shader/transform_feedback.h"
90 # and also check that the scm_rev files haven't changed 36 # and also check that the scm_rev files haven't changed
91 "${CMAKE_CURRENT_SOURCE_DIR}/scm_rev.cpp.in" 37 "${CMAKE_CURRENT_SOURCE_DIR}/scm_rev.cpp.in"
92 "${CMAKE_CURRENT_SOURCE_DIR}/scm_rev.h" 38 "${CMAKE_CURRENT_SOURCE_DIR}/scm_rev.h"
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt
index e4de55f4d..c5ce71706 100644
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -29,7 +29,6 @@ add_library(video_core STATIC
29 dirty_flags.h 29 dirty_flags.h
30 dma_pusher.cpp 30 dma_pusher.cpp
31 dma_pusher.h 31 dma_pusher.h
32 engines/const_buffer_engine_interface.h
33 engines/const_buffer_info.h 32 engines/const_buffer_info.h
34 engines/engine_interface.h 33 engines/engine_interface.h
35 engines/engine_upload.cpp 34 engines/engine_upload.cpp
@@ -61,8 +60,6 @@ add_library(video_core STATIC
61 gpu.h 60 gpu.h
62 gpu_thread.cpp 61 gpu_thread.cpp
63 gpu_thread.h 62 gpu_thread.h
64 guest_driver.cpp
65 guest_driver.h
66 memory_manager.cpp 63 memory_manager.cpp
67 memory_manager.h 64 memory_manager.h
68 query_cache.h 65 query_cache.h
@@ -71,8 +68,6 @@ add_library(video_core STATIC
71 rasterizer_interface.h 68 rasterizer_interface.h
72 renderer_base.cpp 69 renderer_base.cpp
73 renderer_base.h 70 renderer_base.h
74 renderer_opengl/gl_arb_decompiler.cpp
75 renderer_opengl/gl_arb_decompiler.h
76 renderer_opengl/gl_buffer_cache.cpp 71 renderer_opengl/gl_buffer_cache.cpp
77 renderer_opengl/gl_buffer_cache.h 72 renderer_opengl/gl_buffer_cache.h
78 renderer_opengl/gl_device.cpp 73 renderer_opengl/gl_device.cpp
@@ -85,10 +80,6 @@ add_library(video_core STATIC
85 renderer_opengl/gl_resource_manager.h 80 renderer_opengl/gl_resource_manager.h
86 renderer_opengl/gl_shader_cache.cpp 81 renderer_opengl/gl_shader_cache.cpp
87 renderer_opengl/gl_shader_cache.h 82 renderer_opengl/gl_shader_cache.h
88 renderer_opengl/gl_shader_decompiler.cpp
89 renderer_opengl/gl_shader_decompiler.h
90 renderer_opengl/gl_shader_disk_cache.cpp
91 renderer_opengl/gl_shader_disk_cache.h
92 renderer_opengl/gl_shader_manager.cpp 83 renderer_opengl/gl_shader_manager.cpp
93 renderer_opengl/gl_shader_manager.h 84 renderer_opengl/gl_shader_manager.h
94 renderer_opengl/gl_shader_util.cpp 85 renderer_opengl/gl_shader_util.cpp
@@ -128,8 +119,6 @@ add_library(video_core STATIC
128 renderer_vulkan/vk_descriptor_pool.h 119 renderer_vulkan/vk_descriptor_pool.h
129 renderer_vulkan/vk_fence_manager.cpp 120 renderer_vulkan/vk_fence_manager.cpp
130 renderer_vulkan/vk_fence_manager.h 121 renderer_vulkan/vk_fence_manager.h
131 renderer_vulkan/vk_graphics_pipeline.cpp
132 renderer_vulkan/vk_graphics_pipeline.h
133 renderer_vulkan/vk_master_semaphore.cpp 122 renderer_vulkan/vk_master_semaphore.cpp
134 renderer_vulkan/vk_master_semaphore.h 123 renderer_vulkan/vk_master_semaphore.h
135 renderer_vulkan/vk_pipeline_cache.cpp 124 renderer_vulkan/vk_pipeline_cache.cpp
@@ -142,8 +131,6 @@ add_library(video_core STATIC
142 renderer_vulkan/vk_resource_pool.h 131 renderer_vulkan/vk_resource_pool.h
143 renderer_vulkan/vk_scheduler.cpp 132 renderer_vulkan/vk_scheduler.cpp
144 renderer_vulkan/vk_scheduler.h 133 renderer_vulkan/vk_scheduler.h
145 renderer_vulkan/vk_shader_decompiler.cpp
146 renderer_vulkan/vk_shader_decompiler.h
147 renderer_vulkan/vk_shader_util.cpp 134 renderer_vulkan/vk_shader_util.cpp
148 renderer_vulkan/vk_shader_util.h 135 renderer_vulkan/vk_shader_util.h
149 renderer_vulkan/vk_staging_buffer_pool.cpp 136 renderer_vulkan/vk_staging_buffer_pool.cpp
@@ -159,57 +146,6 @@ add_library(video_core STATIC
159 shader_cache.h 146 shader_cache.h
160 shader_notify.cpp 147 shader_notify.cpp
161 shader_notify.h 148 shader_notify.h
162 shader/decode/arithmetic.cpp
163 shader/decode/arithmetic_immediate.cpp
164 shader/decode/bfe.cpp
165 shader/decode/bfi.cpp
166 shader/decode/shift.cpp
167 shader/decode/arithmetic_integer.cpp
168 shader/decode/arithmetic_integer_immediate.cpp
169 shader/decode/arithmetic_half.cpp
170 shader/decode/arithmetic_half_immediate.cpp
171 shader/decode/ffma.cpp
172 shader/decode/hfma2.cpp
173 shader/decode/conversion.cpp
174 shader/decode/memory.cpp
175 shader/decode/texture.cpp
176 shader/decode/image.cpp
177 shader/decode/float_set_predicate.cpp
178 shader/decode/integer_set_predicate.cpp
179 shader/decode/half_set_predicate.cpp
180 shader/decode/predicate_set_register.cpp
181 shader/decode/predicate_set_predicate.cpp
182 shader/decode/register_set_predicate.cpp
183 shader/decode/float_set.cpp
184 shader/decode/integer_set.cpp
185 shader/decode/half_set.cpp
186 shader/decode/video.cpp
187 shader/decode/warp.cpp
188 shader/decode/xmad.cpp
189 shader/decode/other.cpp
190 shader/ast.cpp
191 shader/ast.h
192 shader/async_shaders.cpp
193 shader/async_shaders.h
194 shader/compiler_settings.cpp
195 shader/compiler_settings.h
196 shader/control_flow.cpp
197 shader/control_flow.h
198 shader/decode.cpp
199 shader/expr.cpp
200 shader/expr.h
201 shader/memory_util.cpp
202 shader/memory_util.h
203 shader/node_helper.cpp
204 shader/node_helper.h
205 shader/node.h
206 shader/registry.cpp
207 shader/registry.h
208 shader/shader_ir.cpp
209 shader/shader_ir.h
210 shader/track.cpp
211 shader/transform_feedback.cpp
212 shader/transform_feedback.h
213 surface.cpp 149 surface.cpp
214 surface.h 150 surface.h
215 texture_cache/accelerated_swizzle.cpp 151 texture_cache/accelerated_swizzle.cpp
diff --git a/src/video_core/engines/const_buffer_engine_interface.h b/src/video_core/engines/const_buffer_engine_interface.h
deleted file mode 100644
index f46e81bb7..000000000
--- a/src/video_core/engines/const_buffer_engine_interface.h
+++ /dev/null
@@ -1,103 +0,0 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <type_traits>
8#include "common/bit_field.h"
9#include "common/common_types.h"
10#include "video_core/engines/shader_bytecode.h"
11#include "video_core/engines/shader_type.h"
12#include "video_core/guest_driver.h"
13#include "video_core/textures/texture.h"
14
15namespace Tegra::Engines {
16
17struct SamplerDescriptor {
18 union {
19 u32 raw = 0;
20 BitField<0, 2, Tegra::Shader::TextureType> texture_type;
21 BitField<2, 3, Tegra::Texture::ComponentType> r_type;
22 BitField<5, 1, u32> is_array;
23 BitField<6, 1, u32> is_buffer;
24 BitField<7, 1, u32> is_shadow;
25 BitField<8, 3, Tegra::Texture::ComponentType> g_type;
26 BitField<11, 3, Tegra::Texture::ComponentType> b_type;
27 BitField<14, 3, Tegra::Texture::ComponentType> a_type;
28 BitField<17, 7, Tegra::Texture::TextureFormat> format;
29 };
30
31 bool operator==(const SamplerDescriptor& rhs) const noexcept {
32 return raw == rhs.raw;
33 }
34
35 bool operator!=(const SamplerDescriptor& rhs) const noexcept {
36 return !operator==(rhs);
37 }
38
39 static SamplerDescriptor FromTIC(const Tegra::Texture::TICEntry& tic) {
40 using Tegra::Shader::TextureType;
41 SamplerDescriptor result;
42
43 result.format.Assign(tic.format.Value());
44 result.r_type.Assign(tic.r_type.Value());
45 result.g_type.Assign(tic.g_type.Value());
46 result.b_type.Assign(tic.b_type.Value());
47 result.a_type.Assign(tic.a_type.Value());
48
49 switch (tic.texture_type.Value()) {
50 case Tegra::Texture::TextureType::Texture1D:
51 result.texture_type.Assign(TextureType::Texture1D);
52 return result;
53 case Tegra::Texture::TextureType::Texture2D:
54 result.texture_type.Assign(TextureType::Texture2D);
55 return result;
56 case Tegra::Texture::TextureType::Texture3D:
57 result.texture_type.Assign(TextureType::Texture3D);
58 return result;
59 case Tegra::Texture::TextureType::TextureCubemap:
60 result.texture_type.Assign(TextureType::TextureCube);
61 return result;
62 case Tegra::Texture::TextureType::Texture1DArray:
63 result.texture_type.Assign(TextureType::Texture1D);
64 result.is_array.Assign(1);
65 return result;
66 case Tegra::Texture::TextureType::Texture2DArray:
67 result.texture_type.Assign(TextureType::Texture2D);
68 result.is_array.Assign(1);
69 return result;
70 case Tegra::Texture::TextureType::Texture1DBuffer:
71 result.texture_type.Assign(TextureType::Texture1D);
72 result.is_buffer.Assign(1);
73 return result;
74 case Tegra::Texture::TextureType::Texture2DNoMipmap:
75 result.texture_type.Assign(TextureType::Texture2D);
76 return result;
77 case Tegra::Texture::TextureType::TextureCubeArray:
78 result.texture_type.Assign(TextureType::TextureCube);
79 result.is_array.Assign(1);
80 return result;
81 default:
82 result.texture_type.Assign(TextureType::Texture2D);
83 return result;
84 }
85 }
86};
87static_assert(std::is_trivially_copyable_v<SamplerDescriptor>);
88
89class ConstBufferEngineInterface {
90public:
91 virtual ~ConstBufferEngineInterface() = default;
92 virtual u32 AccessConstBuffer32(ShaderType stage, u64 const_buffer, u64 offset) const = 0;
93 virtual SamplerDescriptor AccessBoundSampler(ShaderType stage, u64 offset) const = 0;
94 virtual SamplerDescriptor AccessBindlessSampler(ShaderType stage, u64 const_buffer,
95 u64 offset) const = 0;
96 virtual SamplerDescriptor AccessSampler(u32 handle) const = 0;
97 virtual u32 GetBoundBuffer() const = 0;
98
99 virtual VideoCore::GuestDriverProfile& AccessGuestDriverProfile() = 0;
100 virtual const VideoCore::GuestDriverProfile& AccessGuestDriverProfile() const = 0;
101};
102
103} // namespace Tegra::Engines
diff --git a/src/video_core/engines/kepler_compute.cpp b/src/video_core/engines/kepler_compute.cpp
index a9b75091e..cae93c470 100644
--- a/src/video_core/engines/kepler_compute.cpp
+++ b/src/video_core/engines/kepler_compute.cpp
@@ -57,53 +57,11 @@ void KeplerCompute::CallMultiMethod(u32 method, const u32* base_start, u32 amoun
57 } 57 }
58} 58}
59 59
60u32 KeplerCompute::AccessConstBuffer32(ShaderType stage, u64 const_buffer, u64 offset) const {
61 ASSERT(stage == ShaderType::Compute);
62 const auto& buffer = launch_description.const_buffer_config[const_buffer];
63 u32 result;
64 std::memcpy(&result, memory_manager.GetPointer(buffer.Address() + offset), sizeof(u32));
65 return result;
66}
67
68SamplerDescriptor KeplerCompute::AccessBoundSampler(ShaderType stage, u64 offset) const {
69 return AccessBindlessSampler(stage, regs.tex_cb_index, offset * sizeof(Texture::TextureHandle));
70}
71
72SamplerDescriptor KeplerCompute::AccessBindlessSampler(ShaderType stage, u64 const_buffer,
73 u64 offset) const {
74 ASSERT(stage == ShaderType::Compute);
75 const auto& tex_info_buffer = launch_description.const_buffer_config[const_buffer];
76 const GPUVAddr tex_info_address = tex_info_buffer.Address() + offset;
77 return AccessSampler(memory_manager.Read<u32>(tex_info_address));
78}
79
80SamplerDescriptor KeplerCompute::AccessSampler(u32 handle) const {
81 const Texture::TextureHandle tex_handle{handle};
82 const Texture::TICEntry tic = GetTICEntry(tex_handle.tic_id);
83 const Texture::TSCEntry tsc = GetTSCEntry(tex_handle.tsc_id);
84
85 SamplerDescriptor result = SamplerDescriptor::FromTIC(tic);
86 result.is_shadow.Assign(tsc.depth_compare_enabled.Value());
87 return result;
88}
89
90VideoCore::GuestDriverProfile& KeplerCompute::AccessGuestDriverProfile() {
91 return rasterizer->AccessGuestDriverProfile();
92}
93
94const VideoCore::GuestDriverProfile& KeplerCompute::AccessGuestDriverProfile() const {
95 return rasterizer->AccessGuestDriverProfile();
96}
97
98void KeplerCompute::ProcessLaunch() { 60void KeplerCompute::ProcessLaunch() {
99 const GPUVAddr launch_desc_loc = regs.launch_desc_loc.Address(); 61 const GPUVAddr launch_desc_loc = regs.launch_desc_loc.Address();
100 memory_manager.ReadBlockUnsafe(launch_desc_loc, &launch_description, 62 memory_manager.ReadBlockUnsafe(launch_desc_loc, &launch_description,
101 LaunchParams::NUM_LAUNCH_PARAMETERS * sizeof(u32)); 63 LaunchParams::NUM_LAUNCH_PARAMETERS * sizeof(u32));
102 64 rasterizer->DispatchCompute();
103 const GPUVAddr code_addr = regs.code_loc.Address() + launch_description.program_start;
104 LOG_TRACE(HW_GPU, "Compute invocation launched at address 0x{:016x}", code_addr);
105
106 rasterizer->DispatchCompute(code_addr);
107} 65}
108 66
109Texture::TICEntry KeplerCompute::GetTICEntry(u32 tic_index) const { 67Texture::TICEntry KeplerCompute::GetTICEntry(u32 tic_index) const {
diff --git a/src/video_core/engines/kepler_compute.h b/src/video_core/engines/kepler_compute.h
index 7c40cba38..0d7683c2d 100644
--- a/src/video_core/engines/kepler_compute.h
+++ b/src/video_core/engines/kepler_compute.h
@@ -10,7 +10,6 @@
10#include "common/bit_field.h" 10#include "common/bit_field.h"
11#include "common/common_funcs.h" 11#include "common/common_funcs.h"
12#include "common/common_types.h" 12#include "common/common_types.h"
13#include "video_core/engines/const_buffer_engine_interface.h"
14#include "video_core/engines/engine_interface.h" 13#include "video_core/engines/engine_interface.h"
15#include "video_core/engines/engine_upload.h" 14#include "video_core/engines/engine_upload.h"
16#include "video_core/engines/shader_type.h" 15#include "video_core/engines/shader_type.h"
@@ -40,7 +39,7 @@ namespace Tegra::Engines {
40#define KEPLER_COMPUTE_REG_INDEX(field_name) \ 39#define KEPLER_COMPUTE_REG_INDEX(field_name) \
41 (offsetof(Tegra::Engines::KeplerCompute::Regs, field_name) / sizeof(u32)) 40 (offsetof(Tegra::Engines::KeplerCompute::Regs, field_name) / sizeof(u32))
42 41
43class KeplerCompute final : public ConstBufferEngineInterface, public EngineInterface { 42class KeplerCompute final : public EngineInterface {
44public: 43public:
45 explicit KeplerCompute(Core::System& system, MemoryManager& memory_manager); 44 explicit KeplerCompute(Core::System& system, MemoryManager& memory_manager);
46 ~KeplerCompute(); 45 ~KeplerCompute();
@@ -209,23 +208,6 @@ public:
209 void CallMultiMethod(u32 method, const u32* base_start, u32 amount, 208 void CallMultiMethod(u32 method, const u32* base_start, u32 amount,
210 u32 methods_pending) override; 209 u32 methods_pending) override;
211 210
212 u32 AccessConstBuffer32(ShaderType stage, u64 const_buffer, u64 offset) const override;
213
214 SamplerDescriptor AccessBoundSampler(ShaderType stage, u64 offset) const override;
215
216 SamplerDescriptor AccessBindlessSampler(ShaderType stage, u64 const_buffer,
217 u64 offset) const override;
218
219 SamplerDescriptor AccessSampler(u32 handle) const override;
220
221 u32 GetBoundBuffer() const override {
222 return regs.tex_cb_index;
223 }
224
225 VideoCore::GuestDriverProfile& AccessGuestDriverProfile() override;
226
227 const VideoCore::GuestDriverProfile& AccessGuestDriverProfile() const override;
228
229private: 211private:
230 void ProcessLaunch(); 212 void ProcessLaunch();
231 213
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp
index aab6b8f7a..103a51fd0 100644
--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -670,42 +670,4 @@ void Maxwell3D::ProcessClearBuffers() {
670 rasterizer->Clear(); 670 rasterizer->Clear();
671} 671}
672 672
673u32 Maxwell3D::AccessConstBuffer32(ShaderType stage, u64 const_buffer, u64 offset) const {
674 ASSERT(stage != ShaderType::Compute);
675 const auto& shader_stage = state.shader_stages[static_cast<std::size_t>(stage)];
676 const auto& buffer = shader_stage.const_buffers[const_buffer];
677 return memory_manager.Read<u32>(buffer.address + offset);
678}
679
680SamplerDescriptor Maxwell3D::AccessBoundSampler(ShaderType stage, u64 offset) const {
681 return AccessBindlessSampler(stage, regs.tex_cb_index, offset * sizeof(Texture::TextureHandle));
682}
683
684SamplerDescriptor Maxwell3D::AccessBindlessSampler(ShaderType stage, u64 const_buffer,
685 u64 offset) const {
686 ASSERT(stage != ShaderType::Compute);
687 const auto& shader = state.shader_stages[static_cast<std::size_t>(stage)];
688 const auto& tex_info_buffer = shader.const_buffers[const_buffer];
689 const GPUVAddr tex_info_address = tex_info_buffer.address + offset;
690 return AccessSampler(memory_manager.Read<u32>(tex_info_address));
691}
692
693SamplerDescriptor Maxwell3D::AccessSampler(u32 handle) const {
694 const Texture::TextureHandle tex_handle{handle};
695 const Texture::TICEntry tic = GetTICEntry(tex_handle.tic_id);
696 const Texture::TSCEntry tsc = GetTSCEntry(tex_handle.tsc_id);
697
698 SamplerDescriptor result = SamplerDescriptor::FromTIC(tic);
699 result.is_shadow.Assign(tsc.depth_compare_enabled.Value());
700 return result;
701}
702
703VideoCore::GuestDriverProfile& Maxwell3D::AccessGuestDriverProfile() {
704 return rasterizer->AccessGuestDriverProfile();
705}
706
707const VideoCore::GuestDriverProfile& Maxwell3D::AccessGuestDriverProfile() const {
708 return rasterizer->AccessGuestDriverProfile();
709}
710
711} // namespace Tegra::Engines 673} // namespace Tegra::Engines
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h
index 335383955..cbf94412b 100644
--- a/src/video_core/engines/maxwell_3d.h
+++ b/src/video_core/engines/maxwell_3d.h
@@ -17,7 +17,6 @@
17#include "common/common_funcs.h" 17#include "common/common_funcs.h"
18#include "common/common_types.h" 18#include "common/common_types.h"
19#include "common/math_util.h" 19#include "common/math_util.h"
20#include "video_core/engines/const_buffer_engine_interface.h"
21#include "video_core/engines/const_buffer_info.h" 20#include "video_core/engines/const_buffer_info.h"
22#include "video_core/engines/engine_interface.h" 21#include "video_core/engines/engine_interface.h"
23#include "video_core/engines/engine_upload.h" 22#include "video_core/engines/engine_upload.h"
@@ -49,7 +48,7 @@ namespace Tegra::Engines {
49#define MAXWELL3D_REG_INDEX(field_name) \ 48#define MAXWELL3D_REG_INDEX(field_name) \
50 (offsetof(Tegra::Engines::Maxwell3D::Regs, field_name) / sizeof(u32)) 49 (offsetof(Tegra::Engines::Maxwell3D::Regs, field_name) / sizeof(u32))
51 50
52class Maxwell3D final : public ConstBufferEngineInterface, public EngineInterface { 51class Maxwell3D final : public EngineInterface {
53public: 52public:
54 explicit Maxwell3D(Core::System& system, MemoryManager& memory_manager); 53 explicit Maxwell3D(Core::System& system, MemoryManager& memory_manager);
55 ~Maxwell3D(); 54 ~Maxwell3D();
@@ -1424,23 +1423,6 @@ public:
1424 1423
1425 void FlushMMEInlineDraw(); 1424 void FlushMMEInlineDraw();
1426 1425
1427 u32 AccessConstBuffer32(ShaderType stage, u64 const_buffer, u64 offset) const override;
1428
1429 SamplerDescriptor AccessBoundSampler(ShaderType stage, u64 offset) const override;
1430
1431 SamplerDescriptor AccessBindlessSampler(ShaderType stage, u64 const_buffer,
1432 u64 offset) const override;
1433
1434 SamplerDescriptor AccessSampler(u32 handle) const override;
1435
1436 u32 GetBoundBuffer() const override {
1437 return regs.tex_cb_index;
1438 }
1439
1440 VideoCore::GuestDriverProfile& AccessGuestDriverProfile() override;
1441
1442 const VideoCore::GuestDriverProfile& AccessGuestDriverProfile() const override;
1443
1444 bool ShouldExecute() const { 1426 bool ShouldExecute() const {
1445 return execute_on; 1427 return execute_on;
1446 } 1428 }
diff --git a/src/video_core/guest_driver.cpp b/src/video_core/guest_driver.cpp
deleted file mode 100644
index f058f2744..000000000
--- a/src/video_core/guest_driver.cpp
+++ /dev/null
@@ -1,37 +0,0 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <algorithm>
6#include <limits>
7#include <vector>
8
9#include "common/common_types.h"
10#include "video_core/guest_driver.h"
11
12namespace VideoCore {
13
14void GuestDriverProfile::DeduceTextureHandlerSize(std::vector<u32> bound_offsets) {
15 if (texture_handler_size) {
16 return;
17 }
18 const std::size_t size = bound_offsets.size();
19 if (size < 2) {
20 return;
21 }
22 std::sort(bound_offsets.begin(), bound_offsets.end(), std::less{});
23 u32 min_val = std::numeric_limits<u32>::max();
24 for (std::size_t i = 1; i < size; ++i) {
25 if (bound_offsets[i] == bound_offsets[i - 1]) {
26 continue;
27 }
28 const u32 new_min = bound_offsets[i] - bound_offsets[i - 1];
29 min_val = std::min(min_val, new_min);
30 }
31 if (min_val > 2) {
32 return;
33 }
34 texture_handler_size = min_texture_handler_size * min_val;
35}
36
37} // namespace VideoCore
diff --git a/src/video_core/guest_driver.h b/src/video_core/guest_driver.h
deleted file mode 100644
index 21e569ba1..000000000
--- a/src/video_core/guest_driver.h
+++ /dev/null
@@ -1,46 +0,0 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <optional>
8#include <vector>
9
10#include "common/common_types.h"
11
12namespace VideoCore {
13
14/**
15 * The GuestDriverProfile class is used to learn about the GPU drivers behavior and collect
16 * information necessary for impossible to avoid HLE methods like shader tracks as they are
17 * Entscheidungsproblems.
18 */
19class GuestDriverProfile {
20public:
21 explicit GuestDriverProfile() = default;
22 explicit GuestDriverProfile(std::optional<u32> texture_handler_size_)
23 : texture_handler_size{texture_handler_size_} {}
24
25 void DeduceTextureHandlerSize(std::vector<u32> bound_offsets);
26
27 u32 GetTextureHandlerSize() const {
28 return texture_handler_size.value_or(default_texture_handler_size);
29 }
30
31 bool IsTextureHandlerSizeKnown() const {
32 return texture_handler_size.has_value();
33 }
34
35private:
36 // Minimum size of texture handler any driver can use.
37 static constexpr u32 min_texture_handler_size = 4;
38
39 // This goes with Vulkan and OpenGL standards but Nvidia GPUs can easily use 4 bytes instead.
40 // Thus, certain drivers may squish the size.
41 static constexpr u32 default_texture_handler_size = 8;
42
43 std::optional<u32> texture_handler_size = default_texture_handler_size;
44};
45
46} // namespace VideoCore
diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h
index 58014c1c3..b094fc064 100644
--- a/src/video_core/rasterizer_interface.h
+++ b/src/video_core/rasterizer_interface.h
@@ -11,7 +11,6 @@
11#include "common/common_types.h" 11#include "common/common_types.h"
12#include "video_core/engines/fermi_2d.h" 12#include "video_core/engines/fermi_2d.h"
13#include "video_core/gpu.h" 13#include "video_core/gpu.h"
14#include "video_core/guest_driver.h"
15 14
16namespace Tegra { 15namespace Tegra {
17class MemoryManager; 16class MemoryManager;
@@ -45,7 +44,7 @@ public:
45 virtual void Clear() = 0; 44 virtual void Clear() = 0;
46 45
47 /// Dispatches a compute shader invocation 46 /// Dispatches a compute shader invocation
48 virtual void DispatchCompute(GPUVAddr code_addr) = 0; 47 virtual void DispatchCompute() = 0;
49 48
50 /// Resets the counter of a query 49 /// Resets the counter of a query
51 virtual void ResetCounter(QueryType type) = 0; 50 virtual void ResetCounter(QueryType type) = 0;
@@ -136,18 +135,5 @@ public:
136 /// Initialize disk cached resources for the game being emulated 135 /// Initialize disk cached resources for the game being emulated
137 virtual void LoadDiskResources(u64 title_id, std::stop_token stop_loading, 136 virtual void LoadDiskResources(u64 title_id, std::stop_token stop_loading,
138 const DiskResourceLoadCallback& callback) {} 137 const DiskResourceLoadCallback& callback) {}
139
140 /// Grant access to the Guest Driver Profile for recording/obtaining info on the guest driver.
141 [[nodiscard]] GuestDriverProfile& AccessGuestDriverProfile() {
142 return guest_driver_profile;
143 }
144
145 /// Grant access to the Guest Driver Profile for recording/obtaining info on the guest driver.
146 [[nodiscard]] const GuestDriverProfile& AccessGuestDriverProfile() const {
147 return guest_driver_profile;
148 }
149
150private:
151 GuestDriverProfile guest_driver_profile{};
152}; 138};
153} // namespace VideoCore 139} // namespace VideoCore
diff --git a/src/video_core/renderer_opengl/gl_arb_decompiler.cpp b/src/video_core/renderer_opengl/gl_arb_decompiler.cpp
deleted file mode 100644
index e8d8d2aa5..000000000
--- a/src/video_core/renderer_opengl/gl_arb_decompiler.cpp
+++ /dev/null
@@ -1,2124 +0,0 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <algorithm>
6#include <array>
7#include <cstddef>
8#include <string>
9#include <string_view>
10#include <utility>
11#include <variant>
12
13#include <fmt/format.h>
14
15#include "common/alignment.h"
16#include "common/assert.h"
17#include "common/common_types.h"
18#include "video_core/renderer_opengl/gl_arb_decompiler.h"
19#include "video_core/renderer_opengl/gl_device.h"
20#include "video_core/shader/registry.h"
21#include "video_core/shader/shader_ir.h"
22
23// Predicates in the decompiled code follow the convention that -1 means true and 0 means false.
24// GLASM lacks booleans, so they have to be implemented as integers.
25// Using -1 for true is useful because both CMP.S and NOT.U can negate it, and CMP.S can be used to
26// select between two values, because -1 will be evaluated as true and 0 as false.
27
28namespace OpenGL {
29
30namespace {
31
32using Tegra::Engines::ShaderType;
33using Tegra::Shader::Attribute;
34using Tegra::Shader::PixelImap;
35using Tegra::Shader::Register;
36using namespace VideoCommon::Shader;
37using Operation = const OperationNode&;
38
39constexpr std::array INTERNAL_FLAG_NAMES = {"ZERO", "SIGN", "CARRY", "OVERFLOW"};
40
41char Swizzle(std::size_t component) {
42 static constexpr std::string_view SWIZZLE{"xyzw"};
43 return SWIZZLE.at(component);
44}
45
46constexpr bool IsGenericAttribute(Attribute::Index index) {
47 return index >= Attribute::Index::Attribute_0 && index <= Attribute::Index::Attribute_31;
48}
49
50u32 GetGenericAttributeIndex(Attribute::Index index) {
51 ASSERT(IsGenericAttribute(index));
52 return static_cast<u32>(index) - static_cast<u32>(Attribute::Index::Attribute_0);
53}
54
55std::string_view Modifiers(Operation operation) {
56 const auto meta = std::get_if<MetaArithmetic>(&operation.GetMeta());
57 if (meta && meta->precise) {
58 return ".PREC";
59 }
60 return "";
61}
62
63std::string_view GetInputFlags(PixelImap attribute) {
64 switch (attribute) {
65 case PixelImap::Perspective:
66 return "";
67 case PixelImap::Constant:
68 return "FLAT ";
69 case PixelImap::ScreenLinear:
70 return "NOPERSPECTIVE ";
71 case PixelImap::Unused:
72 break;
73 }
74 UNIMPLEMENTED_MSG("Unknown attribute usage index={}", attribute);
75 return {};
76}
77
78std::string_view ImageType(Tegra::Shader::ImageType image_type) {
79 switch (image_type) {
80 case Tegra::Shader::ImageType::Texture1D:
81 return "1D";
82 case Tegra::Shader::ImageType::TextureBuffer:
83 return "BUFFER";
84 case Tegra::Shader::ImageType::Texture1DArray:
85 return "ARRAY1D";
86 case Tegra::Shader::ImageType::Texture2D:
87 return "2D";
88 case Tegra::Shader::ImageType::Texture2DArray:
89 return "ARRAY2D";
90 case Tegra::Shader::ImageType::Texture3D:
91 return "3D";
92 }
93 UNREACHABLE();
94 return {};
95}
96
97std::string_view StackName(MetaStackClass stack) {
98 switch (stack) {
99 case MetaStackClass::Ssy:
100 return "SSY";
101 case MetaStackClass::Pbk:
102 return "PBK";
103 }
104 UNREACHABLE();
105 return "";
106};
107
108std::string_view PrimitiveDescription(Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology topology) {
109 switch (topology) {
110 case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::Points:
111 return "POINTS";
112 case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::Lines:
113 case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::LineStrip:
114 return "LINES";
115 case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::LinesAdjacency:
116 case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::LineStripAdjacency:
117 return "LINES_ADJACENCY";
118 case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::Triangles:
119 case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::TriangleStrip:
120 case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::TriangleFan:
121 return "TRIANGLES";
122 case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::TrianglesAdjacency:
123 case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::TriangleStripAdjacency:
124 return "TRIANGLES_ADJACENCY";
125 default:
126 UNIMPLEMENTED_MSG("topology={}", topology);
127 return "POINTS";
128 }
129}
130
131std::string_view TopologyName(Tegra::Shader::OutputTopology topology) {
132 switch (topology) {
133 case Tegra::Shader::OutputTopology::PointList:
134 return "POINTS";
135 case Tegra::Shader::OutputTopology::LineStrip:
136 return "LINE_STRIP";
137 case Tegra::Shader::OutputTopology::TriangleStrip:
138 return "TRIANGLE_STRIP";
139 default:
140 UNIMPLEMENTED_MSG("Unknown output topology: {}", topology);
141 return "points";
142 }
143}
144
145std::string_view StageInputName(ShaderType stage) {
146 switch (stage) {
147 case ShaderType::Vertex:
148 case ShaderType::Geometry:
149 return "vertex";
150 case ShaderType::Fragment:
151 return "fragment";
152 case ShaderType::Compute:
153 return "invocation";
154 default:
155 UNREACHABLE();
156 return "";
157 }
158}
159
160std::string TextureType(const MetaTexture& meta) {
161 if (meta.sampler.is_buffer) {
162 return "BUFFER";
163 }
164 std::string type;
165 if (meta.sampler.is_shadow) {
166 type += "SHADOW";
167 }
168 if (meta.sampler.is_array) {
169 type += "ARRAY";
170 }
171 type += [&meta] {
172 switch (meta.sampler.type) {
173 case Tegra::Shader::TextureType::Texture1D:
174 return "1D";
175 case Tegra::Shader::TextureType::Texture2D:
176 return "2D";
177 case Tegra::Shader::TextureType::Texture3D:
178 return "3D";
179 case Tegra::Shader::TextureType::TextureCube:
180 return "CUBE";
181 }
182 UNREACHABLE();
183 return "2D";
184 }();
185 return type;
186}
187
188class ARBDecompiler final {
189public:
190 explicit ARBDecompiler(const Device& device_, const ShaderIR& ir_, const Registry& registry_,
191 ShaderType stage_, std::string_view identifier);
192
193 std::string Code() const {
194 return shader_source;
195 }
196
197private:
198 void DefineGlobalMemory();
199
200 void DeclareHeader();
201 void DeclareVertex();
202 void DeclareGeometry();
203 void DeclareFragment();
204 void DeclareCompute();
205 void DeclareInputAttributes();
206 void DeclareOutputAttributes();
207 void DeclareLocalMemory();
208 void DeclareGlobalMemory();
209 void DeclareConstantBuffers();
210 void DeclareRegisters();
211 void DeclareTemporaries();
212 void DeclarePredicates();
213 void DeclareInternalFlags();
214
215 void InitializeVariables();
216
217 void DecompileAST();
218 void DecompileBranchMode();
219
220 void VisitAST(const ASTNode& node);
221 std::string VisitExpression(const Expr& node);
222
223 void VisitBlock(const NodeBlock& bb);
224
225 std::string Visit(const Node& node);
226
227 std::tuple<std::string, std::string, std::size_t> BuildCoords(Operation);
228 std::string BuildAoffi(Operation);
229 std::string GlobalMemoryPointer(const GmemNode& gmem);
230 void Exit();
231
232 std::string Assign(Operation);
233 std::string Select(Operation);
234 std::string FClamp(Operation);
235 std::string FCastHalf0(Operation);
236 std::string FCastHalf1(Operation);
237 std::string FSqrt(Operation);
238 std::string FSwizzleAdd(Operation);
239 std::string HAdd2(Operation);
240 std::string HMul2(Operation);
241 std::string HFma2(Operation);
242 std::string HAbsolute(Operation);
243 std::string HNegate(Operation);
244 std::string HClamp(Operation);
245 std::string HCastFloat(Operation);
246 std::string HUnpack(Operation);
247 std::string HMergeF32(Operation);
248 std::string HMergeH0(Operation);
249 std::string HMergeH1(Operation);
250 std::string HPack2(Operation);
251 std::string LogicalAssign(Operation);
252 std::string LogicalPick2(Operation);
253 std::string LogicalAnd2(Operation);
254 std::string FloatOrdered(Operation);
255 std::string FloatUnordered(Operation);
256 std::string LogicalAddCarry(Operation);
257 std::string Texture(Operation);
258 std::string TextureGather(Operation);
259 std::string TextureQueryDimensions(Operation);
260 std::string TextureQueryLod(Operation);
261 std::string TexelFetch(Operation);
262 std::string TextureGradient(Operation);
263 std::string ImageLoad(Operation);
264 std::string ImageStore(Operation);
265 std::string Branch(Operation);
266 std::string BranchIndirect(Operation);
267 std::string PushFlowStack(Operation);
268 std::string PopFlowStack(Operation);
269 std::string Exit(Operation);
270 std::string Discard(Operation);
271 std::string EmitVertex(Operation);
272 std::string EndPrimitive(Operation);
273 std::string InvocationId(Operation);
274 std::string YNegate(Operation);
275 std::string ThreadId(Operation);
276 std::string ShuffleIndexed(Operation);
277 std::string Barrier(Operation);
278 std::string MemoryBarrierGroup(Operation);
279 std::string MemoryBarrierGlobal(Operation);
280
281 template <const std::string_view& op>
282 std::string Unary(Operation operation) {
283 std::string temporary = AllocTemporary();
284 AddLine("{}{} {}, {};", op, Modifiers(operation), temporary, Visit(operation[0]));
285 return temporary;
286 }
287
288 template <const std::string_view& op>
289 std::string Binary(Operation operation) {
290 std::string temporary = AllocTemporary();
291 AddLine("{}{} {}, {}, {};", op, Modifiers(operation), temporary, Visit(operation[0]),
292 Visit(operation[1]));
293 return temporary;
294 }
295
296 template <const std::string_view& op>
297 std::string Trinary(Operation operation) {
298 std::string temporary = AllocTemporary();
299 AddLine("{}{} {}, {}, {}, {};", op, Modifiers(operation), temporary, Visit(operation[0]),
300 Visit(operation[1]), Visit(operation[2]));
301 return temporary;
302 }
303
304 template <const std::string_view& op, bool unordered>
305 std::string FloatComparison(Operation operation) {
306 std::string temporary = AllocTemporary();
307 AddLine("TRUNC.U.CC RC.x, {};", Binary<op>(operation));
308 AddLine("MOV.S {}, 0;", temporary);
309 AddLine("MOV.S {} (NE.x), -1;", temporary);
310
311 const std::string op_a = Visit(operation[0]);
312 const std::string op_b = Visit(operation[1]);
313 if constexpr (unordered) {
314 AddLine("SNE.F RC.x, {}, {};", op_a, op_a);
315 AddLine("TRUNC.U.CC RC.x, RC.x;");
316 AddLine("MOV.S {} (NE.x), -1;", temporary);
317 AddLine("SNE.F RC.x, {}, {};", op_b, op_b);
318 AddLine("TRUNC.U.CC RC.x, RC.x;");
319 AddLine("MOV.S {} (NE.x), -1;", temporary);
320 } else if (op == SNE_F) {
321 AddLine("SNE.F RC.x, {}, {};", op_a, op_a);
322 AddLine("TRUNC.U.CC RC.x, RC.x;");
323 AddLine("MOV.S {} (NE.x), 0;", temporary);
324 AddLine("SNE.F RC.x, {}, {};", op_b, op_b);
325 AddLine("TRUNC.U.CC RC.x, RC.x;");
326 AddLine("MOV.S {} (NE.x), 0;", temporary);
327 }
328 return temporary;
329 }
330
331 template <const std::string_view& op, bool is_nan>
332 std::string HalfComparison(Operation operation) {
333 std::string tmp1 = AllocVectorTemporary();
334 const std::string tmp2 = AllocVectorTemporary();
335 const std::string op_a = Visit(operation[0]);
336 const std::string op_b = Visit(operation[1]);
337 AddLine("UP2H.F {}, {};", tmp1, op_a);
338 AddLine("UP2H.F {}, {};", tmp2, op_b);
339 AddLine("{} {}, {}, {};", op, tmp1, tmp1, tmp2);
340 AddLine("TRUNC.U.CC RC.xy, {};", tmp1);
341 AddLine("MOV.S {}.xy, {{0, 0, 0, 0}};", tmp1);
342 AddLine("MOV.S {}.x (NE.x), -1;", tmp1);
343 AddLine("MOV.S {}.y (NE.y), -1;", tmp1);
344 if constexpr (is_nan) {
345 AddLine("MOVC.F RC.x, {};", op_a);
346 AddLine("MOV.S {}.x (NAN.x), -1;", tmp1);
347 AddLine("MOVC.F RC.x, {};", op_b);
348 AddLine("MOV.S {}.y (NAN.x), -1;", tmp1);
349 }
350 return tmp1;
351 }
352
353 template <const std::string_view& op, const std::string_view& type>
354 std::string AtomicImage(Operation operation) {
355 const auto& meta = std::get<MetaImage>(operation.GetMeta());
356 const u32 image_id = device.GetBaseBindings(stage).image + meta.image.index;
357 const std::size_t num_coords = operation.GetOperandsCount();
358 const std::size_t num_values = meta.values.size();
359
360 const std::string coord = AllocVectorTemporary();
361 const std::string value = AllocVectorTemporary();
362 for (std::size_t i = 0; i < num_coords; ++i) {
363 AddLine("MOV.S {}.{}, {};", coord, Swizzle(i), Visit(operation[i]));
364 }
365 for (std::size_t i = 0; i < num_values; ++i) {
366 AddLine("MOV.F {}.{}, {};", value, Swizzle(i), Visit(meta.values[i]));
367 }
368
369 AddLine("ATOMIM.{}.{} {}.x, {}, {}, image[{}], {};", op, type, coord, value, coord,
370 image_id, ImageType(meta.image.type));
371 return fmt::format("{}.x", coord);
372 }
373
374 template <const std::string_view& op, const std::string_view& type>
375 std::string Atomic(Operation operation) {
376 std::string temporary = AllocTemporary();
377 std::string address;
378 std::string_view opname;
379 bool robust = false;
380 if (const auto gmem = std::get_if<GmemNode>(&*operation[0])) {
381 address = GlobalMemoryPointer(*gmem);
382 opname = "ATOM";
383 robust = true;
384 } else if (const auto smem = std::get_if<SmemNode>(&*operation[0])) {
385 address = fmt::format("shared_mem[{}]", Visit(smem->GetAddress()));
386 opname = "ATOMS";
387 } else {
388 UNREACHABLE();
389 return "{0, 0, 0, 0}";
390 }
391 if (robust) {
392 AddLine("IF NE.x;");
393 }
394 AddLine("{}.{}.{} {}, {}, {};", opname, op, type, temporary, Visit(operation[1]), address);
395 if (robust) {
396 AddLine("ELSE;");
397 AddLine("MOV.S {}, 0;", temporary);
398 AddLine("ENDIF;");
399 }
400 return temporary;
401 }
402
403 template <char type>
404 std::string Negate(Operation operation) {
405 std::string temporary = AllocTemporary();
406 if constexpr (type == 'F') {
407 AddLine("MOV.F32 {}, -{};", temporary, Visit(operation[0]));
408 } else {
409 AddLine("MOV.{} {}, -{};", type, temporary, Visit(operation[0]));
410 }
411 return temporary;
412 }
413
414 template <char type>
415 std::string Absolute(Operation operation) {
416 std::string temporary = AllocTemporary();
417 AddLine("MOV.{} {}, |{}|;", type, temporary, Visit(operation[0]));
418 return temporary;
419 }
420
421 template <char type>
422 std::string BitfieldInsert(Operation operation) {
423 const std::string temporary = AllocVectorTemporary();
424 AddLine("MOV.{} {}.x, {};", type, temporary, Visit(operation[3]));
425 AddLine("MOV.{} {}.y, {};", type, temporary, Visit(operation[2]));
426 AddLine("BFI.{} {}.x, {}, {}, {};", type, temporary, temporary, Visit(operation[1]),
427 Visit(operation[0]));
428 return fmt::format("{}.x", temporary);
429 }
430
431 template <char type>
432 std::string BitfieldExtract(Operation operation) {
433 const std::string temporary = AllocVectorTemporary();
434 AddLine("MOV.{} {}.x, {};", type, temporary, Visit(operation[2]));
435 AddLine("MOV.{} {}.y, {};", type, temporary, Visit(operation[1]));
436 AddLine("BFE.{} {}.x, {}, {};", type, temporary, temporary, Visit(operation[0]));
437 return fmt::format("{}.x", temporary);
438 }
439
440 template <char swizzle>
441 std::string LocalInvocationId(Operation) {
442 return fmt::format("invocation.localid.{}", swizzle);
443 }
444
445 template <char swizzle>
446 std::string WorkGroupId(Operation) {
447 return fmt::format("invocation.groupid.{}", swizzle);
448 }
449
450 template <char c1, char c2>
451 std::string ThreadMask(Operation) {
452 return fmt::format("{}.thread{}{}mask", StageInputName(stage), c1, c2);
453 }
454
455 template <typename... Args>
456 void AddExpression(std::string_view text, Args&&... args) {
457 shader_source += fmt::format(fmt::runtime(text), std::forward<Args>(args)...);
458 }
459
460 template <typename... Args>
461 void AddLine(std::string_view text, Args&&... args) {
462 AddExpression(text, std::forward<Args>(args)...);
463 shader_source += '\n';
464 }
465
466 std::string AllocLongVectorTemporary() {
467 max_long_temporaries = std::max(max_long_temporaries, num_long_temporaries + 1);
468 return fmt::format("L{}", num_long_temporaries++);
469 }
470
471 std::string AllocLongTemporary() {
472 return fmt::format("{}.x", AllocLongVectorTemporary());
473 }
474
475 std::string AllocVectorTemporary() {
476 max_temporaries = std::max(max_temporaries, num_temporaries + 1);
477 return fmt::format("T{}", num_temporaries++);
478 }
479
480 std::string AllocTemporary() {
481 return fmt::format("{}.x", AllocVectorTemporary());
482 }
483
484 void ResetTemporaries() noexcept {
485 num_temporaries = 0;
486 num_long_temporaries = 0;
487 }
488
489 const Device& device;
490 const ShaderIR& ir;
491 const Registry& registry;
492 const ShaderType stage;
493
494 std::size_t num_temporaries = 0;
495 std::size_t max_temporaries = 0;
496
497 std::size_t num_long_temporaries = 0;
498 std::size_t max_long_temporaries = 0;
499
500 std::map<GlobalMemoryBase, u32> global_memory_names;
501
502 std::string shader_source;
503
504 static constexpr std::string_view ADD_F32 = "ADD.F32";
505 static constexpr std::string_view ADD_S = "ADD.S";
506 static constexpr std::string_view ADD_U = "ADD.U";
507 static constexpr std::string_view MUL_F32 = "MUL.F32";
508 static constexpr std::string_view MUL_S = "MUL.S";
509 static constexpr std::string_view MUL_U = "MUL.U";
510 static constexpr std::string_view DIV_F32 = "DIV.F32";
511 static constexpr std::string_view DIV_S = "DIV.S";
512 static constexpr std::string_view DIV_U = "DIV.U";
513 static constexpr std::string_view MAD_F32 = "MAD.F32";
514 static constexpr std::string_view RSQ_F32 = "RSQ.F32";
515 static constexpr std::string_view COS_F32 = "COS.F32";
516 static constexpr std::string_view SIN_F32 = "SIN.F32";
517 static constexpr std::string_view EX2_F32 = "EX2.F32";
518 static constexpr std::string_view LG2_F32 = "LG2.F32";
519 static constexpr std::string_view SLT_F = "SLT.F32";
520 static constexpr std::string_view SLT_S = "SLT.S";
521 static constexpr std::string_view SLT_U = "SLT.U";
522 static constexpr std::string_view SEQ_F = "SEQ.F32";
523 static constexpr std::string_view SEQ_S = "SEQ.S";
524 static constexpr std::string_view SEQ_U = "SEQ.U";
525 static constexpr std::string_view SLE_F = "SLE.F32";
526 static constexpr std::string_view SLE_S = "SLE.S";
527 static constexpr std::string_view SLE_U = "SLE.U";
528 static constexpr std::string_view SGT_F = "SGT.F32";
529 static constexpr std::string_view SGT_S = "SGT.S";
530 static constexpr std::string_view SGT_U = "SGT.U";
531 static constexpr std::string_view SNE_F = "SNE.F32";
532 static constexpr std::string_view SNE_S = "SNE.S";
533 static constexpr std::string_view SNE_U = "SNE.U";
534 static constexpr std::string_view SGE_F = "SGE.F32";
535 static constexpr std::string_view SGE_S = "SGE.S";
536 static constexpr std::string_view SGE_U = "SGE.U";
537 static constexpr std::string_view AND_S = "AND.S";
538 static constexpr std::string_view AND_U = "AND.U";
539 static constexpr std::string_view TRUNC_F = "TRUNC.F";
540 static constexpr std::string_view TRUNC_S = "TRUNC.S";
541 static constexpr std::string_view TRUNC_U = "TRUNC.U";
542 static constexpr std::string_view SHL_S = "SHL.S";
543 static constexpr std::string_view SHL_U = "SHL.U";
544 static constexpr std::string_view SHR_S = "SHR.S";
545 static constexpr std::string_view SHR_U = "SHR.U";
546 static constexpr std::string_view OR_S = "OR.S";
547 static constexpr std::string_view OR_U = "OR.U";
548 static constexpr std::string_view XOR_S = "XOR.S";
549 static constexpr std::string_view XOR_U = "XOR.U";
550 static constexpr std::string_view NOT_S = "NOT.S";
551 static constexpr std::string_view NOT_U = "NOT.U";
552 static constexpr std::string_view BTC_S = "BTC.S";
553 static constexpr std::string_view BTC_U = "BTC.U";
554 static constexpr std::string_view BTFM_S = "BTFM.S";
555 static constexpr std::string_view BTFM_U = "BTFM.U";
556 static constexpr std::string_view ROUND_F = "ROUND.F";
557 static constexpr std::string_view CEIL_F = "CEIL.F";
558 static constexpr std::string_view FLR_F = "FLR.F";
559 static constexpr std::string_view I2F_S = "I2F.S";
560 static constexpr std::string_view I2F_U = "I2F.U";
561 static constexpr std::string_view MIN_F = "MIN.F";
562 static constexpr std::string_view MIN_S = "MIN.S";
563 static constexpr std::string_view MIN_U = "MIN.U";
564 static constexpr std::string_view MAX_F = "MAX.F";
565 static constexpr std::string_view MAX_S = "MAX.S";
566 static constexpr std::string_view MAX_U = "MAX.U";
567 static constexpr std::string_view MOV_U = "MOV.U";
568 static constexpr std::string_view TGBALLOT_U = "TGBALLOT.U";
569 static constexpr std::string_view TGALL_U = "TGALL.U";
570 static constexpr std::string_view TGANY_U = "TGANY.U";
571 static constexpr std::string_view TGEQ_U = "TGEQ.U";
572 static constexpr std::string_view EXCH = "EXCH";
573 static constexpr std::string_view ADD = "ADD";
574 static constexpr std::string_view MIN = "MIN";
575 static constexpr std::string_view MAX = "MAX";
576 static constexpr std::string_view AND = "AND";
577 static constexpr std::string_view OR = "OR";
578 static constexpr std::string_view XOR = "XOR";
579 static constexpr std::string_view U32 = "U32";
580 static constexpr std::string_view S32 = "S32";
581
582 static constexpr std::size_t NUM_ENTRIES = static_cast<std::size_t>(OperationCode::Amount);
583 using DecompilerType = std::string (ARBDecompiler::*)(Operation);
584 static constexpr std::array<DecompilerType, NUM_ENTRIES> OPERATION_DECOMPILERS = {
585 &ARBDecompiler::Assign,
586
587 &ARBDecompiler::Select,
588
589 &ARBDecompiler::Binary<ADD_F32>,
590 &ARBDecompiler::Binary<MUL_F32>,
591 &ARBDecompiler::Binary<DIV_F32>,
592 &ARBDecompiler::Trinary<MAD_F32>,
593 &ARBDecompiler::Negate<'F'>,
594 &ARBDecompiler::Absolute<'F'>,
595 &ARBDecompiler::FClamp,
596 &ARBDecompiler::FCastHalf0,
597 &ARBDecompiler::FCastHalf1,
598 &ARBDecompiler::Binary<MIN_F>,
599 &ARBDecompiler::Binary<MAX_F>,
600 &ARBDecompiler::Unary<COS_F32>,
601 &ARBDecompiler::Unary<SIN_F32>,
602 &ARBDecompiler::Unary<EX2_F32>,
603 &ARBDecompiler::Unary<LG2_F32>,
604 &ARBDecompiler::Unary<RSQ_F32>,
605 &ARBDecompiler::FSqrt,
606 &ARBDecompiler::Unary<ROUND_F>,
607 &ARBDecompiler::Unary<FLR_F>,
608 &ARBDecompiler::Unary<CEIL_F>,
609 &ARBDecompiler::Unary<TRUNC_F>,
610 &ARBDecompiler::Unary<I2F_S>,
611 &ARBDecompiler::Unary<I2F_U>,
612 &ARBDecompiler::FSwizzleAdd,
613
614 &ARBDecompiler::Binary<ADD_S>,
615 &ARBDecompiler::Binary<MUL_S>,
616 &ARBDecompiler::Binary<DIV_S>,
617 &ARBDecompiler::Negate<'S'>,
618 &ARBDecompiler::Absolute<'S'>,
619 &ARBDecompiler::Binary<MIN_S>,
620 &ARBDecompiler::Binary<MAX_S>,
621
622 &ARBDecompiler::Unary<TRUNC_S>,
623 &ARBDecompiler::Unary<MOV_U>,
624 &ARBDecompiler::Binary<SHL_S>,
625 &ARBDecompiler::Binary<SHR_U>,
626 &ARBDecompiler::Binary<SHR_S>,
627 &ARBDecompiler::Binary<AND_S>,
628 &ARBDecompiler::Binary<OR_S>,
629 &ARBDecompiler::Binary<XOR_S>,
630 &ARBDecompiler::Unary<NOT_S>,
631 &ARBDecompiler::BitfieldInsert<'S'>,
632 &ARBDecompiler::BitfieldExtract<'S'>,
633 &ARBDecompiler::Unary<BTC_S>,
634 &ARBDecompiler::Unary<BTFM_S>,
635
636 &ARBDecompiler::Binary<ADD_U>,
637 &ARBDecompiler::Binary<MUL_U>,
638 &ARBDecompiler::Binary<DIV_U>,
639 &ARBDecompiler::Binary<MIN_U>,
640 &ARBDecompiler::Binary<MAX_U>,
641 &ARBDecompiler::Unary<TRUNC_U>,
642 &ARBDecompiler::Unary<MOV_U>,
643 &ARBDecompiler::Binary<SHL_U>,
644 &ARBDecompiler::Binary<SHR_U>,
645 &ARBDecompiler::Binary<SHR_U>,
646 &ARBDecompiler::Binary<AND_U>,
647 &ARBDecompiler::Binary<OR_U>,
648 &ARBDecompiler::Binary<XOR_U>,
649 &ARBDecompiler::Unary<NOT_U>,
650 &ARBDecompiler::BitfieldInsert<'U'>,
651 &ARBDecompiler::BitfieldExtract<'U'>,
652 &ARBDecompiler::Unary<BTC_U>,
653 &ARBDecompiler::Unary<BTFM_U>,
654
655 &ARBDecompiler::HAdd2,
656 &ARBDecompiler::HMul2,
657 &ARBDecompiler::HFma2,
658 &ARBDecompiler::HAbsolute,
659 &ARBDecompiler::HNegate,
660 &ARBDecompiler::HClamp,
661 &ARBDecompiler::HCastFloat,
662 &ARBDecompiler::HUnpack,
663 &ARBDecompiler::HMergeF32,
664 &ARBDecompiler::HMergeH0,
665 &ARBDecompiler::HMergeH1,
666 &ARBDecompiler::HPack2,
667
668 &ARBDecompiler::LogicalAssign,
669 &ARBDecompiler::Binary<AND_U>,
670 &ARBDecompiler::Binary<OR_U>,
671 &ARBDecompiler::Binary<XOR_U>,
672 &ARBDecompiler::Unary<NOT_U>,
673 &ARBDecompiler::LogicalPick2,
674 &ARBDecompiler::LogicalAnd2,
675
676 &ARBDecompiler::FloatComparison<SLT_F, false>,
677 &ARBDecompiler::FloatComparison<SEQ_F, false>,
678 &ARBDecompiler::FloatComparison<SLE_F, false>,
679 &ARBDecompiler::FloatComparison<SGT_F, false>,
680 &ARBDecompiler::FloatComparison<SNE_F, false>,
681 &ARBDecompiler::FloatComparison<SGE_F, false>,
682 &ARBDecompiler::FloatOrdered,
683 &ARBDecompiler::FloatUnordered,
684 &ARBDecompiler::FloatComparison<SLT_F, true>,
685 &ARBDecompiler::FloatComparison<SEQ_F, true>,
686 &ARBDecompiler::FloatComparison<SLE_F, true>,
687 &ARBDecompiler::FloatComparison<SGT_F, true>,
688 &ARBDecompiler::FloatComparison<SNE_F, true>,
689 &ARBDecompiler::FloatComparison<SGE_F, true>,
690
691 &ARBDecompiler::Binary<SLT_S>,
692 &ARBDecompiler::Binary<SEQ_S>,
693 &ARBDecompiler::Binary<SLE_S>,
694 &ARBDecompiler::Binary<SGT_S>,
695 &ARBDecompiler::Binary<SNE_S>,
696 &ARBDecompiler::Binary<SGE_S>,
697
698 &ARBDecompiler::Binary<SLT_U>,
699 &ARBDecompiler::Binary<SEQ_U>,
700 &ARBDecompiler::Binary<SLE_U>,
701 &ARBDecompiler::Binary<SGT_U>,
702 &ARBDecompiler::Binary<SNE_U>,
703 &ARBDecompiler::Binary<SGE_U>,
704
705 &ARBDecompiler::LogicalAddCarry,
706
707 &ARBDecompiler::HalfComparison<SLT_F, false>,
708 &ARBDecompiler::HalfComparison<SEQ_F, false>,
709 &ARBDecompiler::HalfComparison<SLE_F, false>,
710 &ARBDecompiler::HalfComparison<SGT_F, false>,
711 &ARBDecompiler::HalfComparison<SNE_F, false>,
712 &ARBDecompiler::HalfComparison<SGE_F, false>,
713 &ARBDecompiler::HalfComparison<SLT_F, true>,
714 &ARBDecompiler::HalfComparison<SEQ_F, true>,
715 &ARBDecompiler::HalfComparison<SLE_F, true>,
716 &ARBDecompiler::HalfComparison<SGT_F, true>,
717 &ARBDecompiler::HalfComparison<SNE_F, true>,
718 &ARBDecompiler::HalfComparison<SGE_F, true>,
719
720 &ARBDecompiler::Texture,
721 &ARBDecompiler::Texture,
722 &ARBDecompiler::TextureGather,
723 &ARBDecompiler::TextureQueryDimensions,
724 &ARBDecompiler::TextureQueryLod,
725 &ARBDecompiler::TexelFetch,
726 &ARBDecompiler::TextureGradient,
727
728 &ARBDecompiler::ImageLoad,
729 &ARBDecompiler::ImageStore,
730
731 &ARBDecompiler::AtomicImage<ADD, U32>,
732 &ARBDecompiler::AtomicImage<AND, U32>,
733 &ARBDecompiler::AtomicImage<OR, U32>,
734 &ARBDecompiler::AtomicImage<XOR, U32>,
735 &ARBDecompiler::AtomicImage<EXCH, U32>,
736
737 &ARBDecompiler::Atomic<EXCH, U32>,
738 &ARBDecompiler::Atomic<ADD, U32>,
739 &ARBDecompiler::Atomic<MIN, U32>,
740 &ARBDecompiler::Atomic<MAX, U32>,
741 &ARBDecompiler::Atomic<AND, U32>,
742 &ARBDecompiler::Atomic<OR, U32>,
743 &ARBDecompiler::Atomic<XOR, U32>,
744
745 &ARBDecompiler::Atomic<EXCH, S32>,
746 &ARBDecompiler::Atomic<ADD, S32>,
747 &ARBDecompiler::Atomic<MIN, S32>,
748 &ARBDecompiler::Atomic<MAX, S32>,
749 &ARBDecompiler::Atomic<AND, S32>,
750 &ARBDecompiler::Atomic<OR, S32>,
751 &ARBDecompiler::Atomic<XOR, S32>,
752
753 &ARBDecompiler::Atomic<ADD, U32>,
754 &ARBDecompiler::Atomic<MIN, U32>,
755 &ARBDecompiler::Atomic<MAX, U32>,
756 &ARBDecompiler::Atomic<AND, U32>,
757 &ARBDecompiler::Atomic<OR, U32>,
758 &ARBDecompiler::Atomic<XOR, U32>,
759
760 &ARBDecompiler::Atomic<ADD, S32>,
761 &ARBDecompiler::Atomic<MIN, S32>,
762 &ARBDecompiler::Atomic<MAX, S32>,
763 &ARBDecompiler::Atomic<AND, S32>,
764 &ARBDecompiler::Atomic<OR, S32>,
765 &ARBDecompiler::Atomic<XOR, S32>,
766
767 &ARBDecompiler::Branch,
768 &ARBDecompiler::BranchIndirect,
769 &ARBDecompiler::PushFlowStack,
770 &ARBDecompiler::PopFlowStack,
771 &ARBDecompiler::Exit,
772 &ARBDecompiler::Discard,
773
774 &ARBDecompiler::EmitVertex,
775 &ARBDecompiler::EndPrimitive,
776
777 &ARBDecompiler::InvocationId,
778 &ARBDecompiler::YNegate,
779 &ARBDecompiler::LocalInvocationId<'x'>,
780 &ARBDecompiler::LocalInvocationId<'y'>,
781 &ARBDecompiler::LocalInvocationId<'z'>,
782 &ARBDecompiler::WorkGroupId<'x'>,
783 &ARBDecompiler::WorkGroupId<'y'>,
784 &ARBDecompiler::WorkGroupId<'z'>,
785
786 &ARBDecompiler::Unary<TGBALLOT_U>,
787 &ARBDecompiler::Unary<TGALL_U>,
788 &ARBDecompiler::Unary<TGANY_U>,
789 &ARBDecompiler::Unary<TGEQ_U>,
790
791 &ARBDecompiler::ThreadId,
792 &ARBDecompiler::ThreadMask<'e', 'q'>,
793 &ARBDecompiler::ThreadMask<'g', 'e'>,
794 &ARBDecompiler::ThreadMask<'g', 't'>,
795 &ARBDecompiler::ThreadMask<'l', 'e'>,
796 &ARBDecompiler::ThreadMask<'l', 't'>,
797 &ARBDecompiler::ShuffleIndexed,
798
799 &ARBDecompiler::Barrier,
800 &ARBDecompiler::MemoryBarrierGroup,
801 &ARBDecompiler::MemoryBarrierGlobal,
802 };
803};
804
805ARBDecompiler::ARBDecompiler(const Device& device_, const ShaderIR& ir_, const Registry& registry_,
806 ShaderType stage_, std::string_view identifier)
807 : device{device_}, ir{ir_}, registry{registry_}, stage{stage_} {
808 DefineGlobalMemory();
809
810 AddLine("TEMP RC;");
811 AddLine("TEMP FSWZA[4];");
812 AddLine("TEMP FSWZB[4];");
813 if (ir.IsDecompiled()) {
814 DecompileAST();
815 } else {
816 DecompileBranchMode();
817 }
818 AddLine("END");
819
820 const std::string code = std::move(shader_source);
821 DeclareHeader();
822 DeclareVertex();
823 DeclareGeometry();
824 DeclareFragment();
825 DeclareCompute();
826 DeclareInputAttributes();
827 DeclareOutputAttributes();
828 DeclareLocalMemory();
829 DeclareGlobalMemory();
830 DeclareConstantBuffers();
831 DeclareRegisters();
832 DeclareTemporaries();
833 DeclarePredicates();
834 DeclareInternalFlags();
835
836 shader_source += code;
837}
838
839std::string_view HeaderStageName(ShaderType stage) {
840 switch (stage) {
841 case ShaderType::Vertex:
842 return "vp";
843 case ShaderType::Geometry:
844 return "gp";
845 case ShaderType::Fragment:
846 return "fp";
847 case ShaderType::Compute:
848 return "cp";
849 default:
850 UNREACHABLE();
851 return "";
852 }
853}
854
855void ARBDecompiler::DefineGlobalMemory() {
856 u32 binding = 0;
857 for (const auto& pair : ir.GetGlobalMemory()) {
858 const GlobalMemoryBase base = pair.first;
859 global_memory_names.emplace(base, binding);
860 ++binding;
861 }
862}
863
864void ARBDecompiler::DeclareHeader() {
865 AddLine("!!NV{}5.0", HeaderStageName(stage));
866 // Enabling this allows us to cheat on some instructions like TXL with SHADOWARRAY2D
867 AddLine("OPTION NV_internal;");
868 AddLine("OPTION NV_gpu_program_fp64;");
869 AddLine("OPTION NV_shader_thread_group;");
870 if (ir.UsesWarps() && device.HasWarpIntrinsics()) {
871 AddLine("OPTION NV_shader_thread_shuffle;");
872 }
873 if (stage == ShaderType::Vertex) {
874 if (device.HasNvViewportArray2()) {
875 AddLine("OPTION NV_viewport_array2;");
876 }
877 }
878 if (stage == ShaderType::Fragment) {
879 AddLine("OPTION ARB_draw_buffers;");
880 }
881 if (device.HasImageLoadFormatted()) {
882 AddLine("OPTION EXT_shader_image_load_formatted;");
883 }
884}
885
886void ARBDecompiler::DeclareVertex() {
887 if (stage != ShaderType::Vertex) {
888 return;
889 }
890 AddLine("OUTPUT result_clip[] = {{ result.clip[0..7] }};");
891}
892
893void ARBDecompiler::DeclareGeometry() {
894 if (stage != ShaderType::Geometry) {
895 return;
896 }
897 const auto& info = registry.GetGraphicsInfo();
898 const auto& header = ir.GetHeader();
899 AddLine("PRIMITIVE_IN {};", PrimitiveDescription(info.primitive_topology));
900 AddLine("PRIMITIVE_OUT {};", TopologyName(header.common3.output_topology));
901 AddLine("VERTICES_OUT {};", header.common4.max_output_vertices.Value());
902 AddLine("ATTRIB vertex_position = vertex.position;");
903}
904
905void ARBDecompiler::DeclareFragment() {
906 if (stage != ShaderType::Fragment) {
907 return;
908 }
909 AddLine("OUTPUT result_color7 = result.color[7];");
910 AddLine("OUTPUT result_color6 = result.color[6];");
911 AddLine("OUTPUT result_color5 = result.color[5];");
912 AddLine("OUTPUT result_color4 = result.color[4];");
913 AddLine("OUTPUT result_color3 = result.color[3];");
914 AddLine("OUTPUT result_color2 = result.color[2];");
915 AddLine("OUTPUT result_color1 = result.color[1];");
916 AddLine("OUTPUT result_color0 = result.color;");
917}
918
919void ARBDecompiler::DeclareCompute() {
920 if (stage != ShaderType::Compute) {
921 return;
922 }
923 const ComputeInfo& info = registry.GetComputeInfo();
924 AddLine("GROUP_SIZE {} {} {};", info.workgroup_size[0], info.workgroup_size[1],
925 info.workgroup_size[2]);
926 if (info.shared_memory_size_in_words == 0) {
927 return;
928 }
929 const u32 limit = device.GetMaxComputeSharedMemorySize();
930 u32 size_in_bytes = info.shared_memory_size_in_words * 4;
931 if (size_in_bytes > limit) {
932 LOG_ERROR(Render_OpenGL, "Shared memory size {} is clamped to host's limit {}",
933 size_in_bytes, limit);
934 size_in_bytes = limit;
935 }
936
937 AddLine("SHARED_MEMORY {};", size_in_bytes);
938 AddLine("SHARED shared_mem[] = {{program.sharedmem}};");
939}
940
941void ARBDecompiler::DeclareInputAttributes() {
942 if (stage == ShaderType::Compute) {
943 return;
944 }
945 const std::string_view stage_name = StageInputName(stage);
946 for (const auto attribute : ir.GetInputAttributes()) {
947 if (!IsGenericAttribute(attribute)) {
948 continue;
949 }
950 const u32 index = GetGenericAttributeIndex(attribute);
951
952 std::string_view suffix;
953 if (stage == ShaderType::Fragment) {
954 const auto input_mode{ir.GetHeader().ps.GetPixelImap(index)};
955 if (input_mode == PixelImap::Unused) {
956 return;
957 }
958 suffix = GetInputFlags(input_mode);
959 }
960 AddLine("{}ATTRIB in_attr{}[] = {{ {}.attrib[{}..{}] }};", suffix, index, stage_name, index,
961 index);
962 }
963}
964
965void ARBDecompiler::DeclareOutputAttributes() {
966 if (stage == ShaderType::Compute) {
967 return;
968 }
969 for (const auto attribute : ir.GetOutputAttributes()) {
970 if (!IsGenericAttribute(attribute)) {
971 continue;
972 }
973 const u32 index = GetGenericAttributeIndex(attribute);
974 AddLine("OUTPUT out_attr{}[] = {{ result.attrib[{}..{}] }};", index, index, index);
975 }
976}
977
978void ARBDecompiler::DeclareLocalMemory() {
979 u64 size = 0;
980 if (stage == ShaderType::Compute) {
981 size = registry.GetComputeInfo().local_memory_size_in_words * 4ULL;
982 } else {
983 size = ir.GetHeader().GetLocalMemorySize();
984 }
985 if (size == 0) {
986 return;
987 }
988 const u64 element_count = Common::AlignUp(size, 4) / 4;
989 AddLine("TEMP lmem[{}];", element_count);
990}
991
992void ARBDecompiler::DeclareGlobalMemory() {
993 const size_t num_entries = ir.GetGlobalMemory().size();
994 if (num_entries > 0) {
995 AddLine("PARAM c[{}] = {{ program.local[0..{}] }};", num_entries, num_entries - 1);
996 }
997}
998
999void ARBDecompiler::DeclareConstantBuffers() {
1000 u32 binding = 0;
1001 for (const auto& cbuf : ir.GetConstantBuffers()) {
1002 AddLine("CBUFFER cbuf{}[] = {{ program.buffer[{}] }};", cbuf.first, binding);
1003 ++binding;
1004 }
1005}
1006
1007void ARBDecompiler::DeclareRegisters() {
1008 for (const u32 gpr : ir.GetRegisters()) {
1009 AddLine("TEMP R{};", gpr);
1010 }
1011}
1012
1013void ARBDecompiler::DeclareTemporaries() {
1014 for (std::size_t i = 0; i < max_temporaries; ++i) {
1015 AddLine("TEMP T{};", i);
1016 }
1017 for (std::size_t i = 0; i < max_long_temporaries; ++i) {
1018 AddLine("LONG TEMP L{};", i);
1019 }
1020}
1021
1022void ARBDecompiler::DeclarePredicates() {
1023 for (const Tegra::Shader::Pred pred : ir.GetPredicates()) {
1024 AddLine("TEMP P{};", static_cast<u64>(pred));
1025 }
1026}
1027
1028void ARBDecompiler::DeclareInternalFlags() {
1029 for (const char* name : INTERNAL_FLAG_NAMES) {
1030 AddLine("TEMP {};", name);
1031 }
1032}
1033
1034void ARBDecompiler::InitializeVariables() {
1035 AddLine("MOV.F32 FSWZA[0], -1;");
1036 AddLine("MOV.F32 FSWZA[1], 1;");
1037 AddLine("MOV.F32 FSWZA[2], -1;");
1038 AddLine("MOV.F32 FSWZA[3], 0;");
1039 AddLine("MOV.F32 FSWZB[0], -1;");
1040 AddLine("MOV.F32 FSWZB[1], -1;");
1041 AddLine("MOV.F32 FSWZB[2], 1;");
1042 AddLine("MOV.F32 FSWZB[3], -1;");
1043
1044 if (stage == ShaderType::Vertex || stage == ShaderType::Geometry) {
1045 AddLine("MOV.F result.position, {{0, 0, 0, 1}};");
1046 }
1047 for (const auto attribute : ir.GetOutputAttributes()) {
1048 if (!IsGenericAttribute(attribute)) {
1049 continue;
1050 }
1051 const u32 index = GetGenericAttributeIndex(attribute);
1052 AddLine("MOV.F result.attrib[{}], {{0, 0, 0, 1}};", index);
1053 }
1054 for (const u32 gpr : ir.GetRegisters()) {
1055 AddLine("MOV.F R{}, {{0, 0, 0, 0}};", gpr);
1056 }
1057 for (const Tegra::Shader::Pred pred : ir.GetPredicates()) {
1058 AddLine("MOV.U P{}, {{0, 0, 0, 0}};", static_cast<u64>(pred));
1059 }
1060}
1061
1062void ARBDecompiler::DecompileAST() {
1063 const u32 num_flow_variables = ir.GetASTNumVariables();
1064 for (u32 i = 0; i < num_flow_variables; ++i) {
1065 AddLine("TEMP F{};", i);
1066 }
1067 for (u32 i = 0; i < num_flow_variables; ++i) {
1068 AddLine("MOV.U F{}, {{0, 0, 0, 0}};", i);
1069 }
1070
1071 InitializeVariables();
1072
1073 VisitAST(ir.GetASTProgram());
1074}
1075
1076void ARBDecompiler::DecompileBranchMode() {
1077 static constexpr u32 FLOW_STACK_SIZE = 20;
1078 if (!ir.IsFlowStackDisabled()) {
1079 AddLine("TEMP SSY[{}];", FLOW_STACK_SIZE);
1080 AddLine("TEMP PBK[{}];", FLOW_STACK_SIZE);
1081 AddLine("TEMP SSY_TOP;");
1082 AddLine("TEMP PBK_TOP;");
1083 }
1084
1085 AddLine("TEMP PC;");
1086
1087 if (!ir.IsFlowStackDisabled()) {
1088 AddLine("MOV.U SSY_TOP.x, 0;");
1089 AddLine("MOV.U PBK_TOP.x, 0;");
1090 }
1091
1092 InitializeVariables();
1093
1094 const auto basic_block_end = ir.GetBasicBlocks().end();
1095 auto basic_block_it = ir.GetBasicBlocks().begin();
1096 const u32 first_address = basic_block_it->first;
1097 AddLine("MOV.U PC.x, {};", first_address);
1098
1099 AddLine("REP;");
1100
1101 std::size_t num_blocks = 0;
1102 while (basic_block_it != basic_block_end) {
1103 const auto& [address, bb] = *basic_block_it;
1104 ++num_blocks;
1105
1106 AddLine("SEQ.S.CC RC.x, PC.x, {};", address);
1107 AddLine("IF NE.x;");
1108
1109 VisitBlock(bb);
1110
1111 ++basic_block_it;
1112
1113 if (basic_block_it != basic_block_end) {
1114 const auto op = std::get_if<OperationNode>(&*bb[bb.size() - 1]);
1115 if (!op || op->GetCode() != OperationCode::Branch) {
1116 const u32 next_address = basic_block_it->first;
1117 AddLine("MOV.U PC.x, {};", next_address);
1118 AddLine("CONT;");
1119 }
1120 }
1121
1122 AddLine("ELSE;");
1123 }
1124 AddLine("RET;");
1125 while (num_blocks--) {
1126 AddLine("ENDIF;");
1127 }
1128
1129 AddLine("ENDREP;");
1130}
1131
1132void ARBDecompiler::VisitAST(const ASTNode& node) {
1133 if (const auto ast = std::get_if<ASTProgram>(&*node->GetInnerData())) {
1134 for (ASTNode current = ast->nodes.GetFirst(); current; current = current->GetNext()) {
1135 VisitAST(current);
1136 }
1137 } else if (const auto if_then = std::get_if<ASTIfThen>(&*node->GetInnerData())) {
1138 const std::string condition = VisitExpression(if_then->condition);
1139 ResetTemporaries();
1140
1141 AddLine("MOVC.U RC.x, {};", condition);
1142 AddLine("IF NE.x;");
1143 for (ASTNode current = if_then->nodes.GetFirst(); current; current = current->GetNext()) {
1144 VisitAST(current);
1145 }
1146 AddLine("ENDIF;");
1147 } else if (const auto if_else = std::get_if<ASTIfElse>(&*node->GetInnerData())) {
1148 AddLine("ELSE;");
1149 for (ASTNode current = if_else->nodes.GetFirst(); current; current = current->GetNext()) {
1150 VisitAST(current);
1151 }
1152 } else if (const auto decoded = std::get_if<ASTBlockDecoded>(&*node->GetInnerData())) {
1153 VisitBlock(decoded->nodes);
1154 } else if (const auto var_set = std::get_if<ASTVarSet>(&*node->GetInnerData())) {
1155 AddLine("MOV.U F{}, {};", var_set->index, VisitExpression(var_set->condition));
1156 ResetTemporaries();
1157 } else if (const auto do_while = std::get_if<ASTDoWhile>(&*node->GetInnerData())) {
1158 const std::string condition = VisitExpression(do_while->condition);
1159 ResetTemporaries();
1160 AddLine("REP;");
1161 for (ASTNode current = do_while->nodes.GetFirst(); current; current = current->GetNext()) {
1162 VisitAST(current);
1163 }
1164 AddLine("MOVC.U RC.x, {};", condition);
1165 AddLine("BRK (NE.x);");
1166 AddLine("ENDREP;");
1167 } else if (const auto ast_return = std::get_if<ASTReturn>(&*node->GetInnerData())) {
1168 const bool is_true = ExprIsTrue(ast_return->condition);
1169 if (!is_true) {
1170 AddLine("MOVC.U RC.x, {};", VisitExpression(ast_return->condition));
1171 AddLine("IF NE.x;");
1172 ResetTemporaries();
1173 }
1174 if (ast_return->kills) {
1175 AddLine("KIL TR;");
1176 } else {
1177 Exit();
1178 }
1179 if (!is_true) {
1180 AddLine("ENDIF;");
1181 }
1182 } else if (const auto ast_break = std::get_if<ASTBreak>(&*node->GetInnerData())) {
1183 if (ExprIsTrue(ast_break->condition)) {
1184 AddLine("BRK;");
1185 } else {
1186 AddLine("MOVC.U RC.x, {};", VisitExpression(ast_break->condition));
1187 AddLine("BRK (NE.x);");
1188 ResetTemporaries();
1189 }
1190 } else if (std::holds_alternative<ASTLabel>(*node->GetInnerData())) {
1191 // Nothing to do
1192 } else {
1193 UNREACHABLE();
1194 }
1195}
1196
1197std::string ARBDecompiler::VisitExpression(const Expr& node) {
1198 if (const auto expr = std::get_if<ExprAnd>(&*node)) {
1199 std::string result = AllocTemporary();
1200 AddLine("AND.U {}, {}, {};", result, VisitExpression(expr->operand1),
1201 VisitExpression(expr->operand2));
1202 return result;
1203 }
1204 if (const auto expr = std::get_if<ExprOr>(&*node)) {
1205 std::string result = AllocTemporary();
1206 AddLine("OR.U {}, {}, {};", result, VisitExpression(expr->operand1),
1207 VisitExpression(expr->operand2));
1208 return result;
1209 }
1210 if (const auto expr = std::get_if<ExprNot>(&*node)) {
1211 std::string result = AllocTemporary();
1212 AddLine("CMP.S {}, {}, 0, -1;", result, VisitExpression(expr->operand1));
1213 return result;
1214 }
1215 if (const auto expr = std::get_if<ExprPredicate>(&*node)) {
1216 return fmt::format("P{}.x", static_cast<u64>(expr->predicate));
1217 }
1218 if (const auto expr = std::get_if<ExprCondCode>(&*node)) {
1219 return Visit(ir.GetConditionCode(expr->cc));
1220 }
1221 if (const auto expr = std::get_if<ExprVar>(&*node)) {
1222 return fmt::format("F{}.x", expr->var_index);
1223 }
1224 if (const auto expr = std::get_if<ExprBoolean>(&*node)) {
1225 return expr->value ? "0xffffffff" : "0";
1226 }
1227 if (const auto expr = std::get_if<ExprGprEqual>(&*node)) {
1228 std::string result = AllocTemporary();
1229 AddLine("SEQ.U {}, R{}.x, {};", result, expr->gpr, expr->value);
1230 return result;
1231 }
1232 UNREACHABLE();
1233 return "0";
1234}
1235
1236void ARBDecompiler::VisitBlock(const NodeBlock& bb) {
1237 for (const auto& node : bb) {
1238 Visit(node);
1239 }
1240}
1241
1242std::string ARBDecompiler::Visit(const Node& node) {
1243 if (const auto operation = std::get_if<OperationNode>(&*node)) {
1244 if (const auto amend_index = operation->GetAmendIndex()) {
1245 Visit(ir.GetAmendNode(*amend_index));
1246 }
1247 const std::size_t index = static_cast<std::size_t>(operation->GetCode());
1248 if (index >= OPERATION_DECOMPILERS.size()) {
1249 UNREACHABLE_MSG("Out of bounds operation: {}", index);
1250 return {};
1251 }
1252 const auto decompiler = OPERATION_DECOMPILERS[index];
1253 if (decompiler == nullptr) {
1254 UNREACHABLE_MSG("Undefined operation: {}", index);
1255 return {};
1256 }
1257 return (this->*decompiler)(*operation);
1258 }
1259
1260 if (const auto gpr = std::get_if<GprNode>(&*node)) {
1261 const u32 index = gpr->GetIndex();
1262 if (index == Register::ZeroIndex) {
1263 return "{0, 0, 0, 0}.x";
1264 }
1265 return fmt::format("R{}.x", index);
1266 }
1267
1268 if (const auto cv = std::get_if<CustomVarNode>(&*node)) {
1269 return fmt::format("CV{}.x", cv->GetIndex());
1270 }
1271
1272 if (const auto immediate = std::get_if<ImmediateNode>(&*node)) {
1273 std::string temporary = AllocTemporary();
1274 AddLine("MOV.U {}, {};", temporary, immediate->GetValue());
1275 return temporary;
1276 }
1277
1278 if (const auto predicate = std::get_if<PredicateNode>(&*node)) {
1279 std::string temporary = AllocTemporary();
1280 switch (const auto index = predicate->GetIndex(); index) {
1281 case Tegra::Shader::Pred::UnusedIndex:
1282 AddLine("MOV.S {}, -1;", temporary);
1283 break;
1284 case Tegra::Shader::Pred::NeverExecute:
1285 AddLine("MOV.S {}, 0;", temporary);
1286 break;
1287 default:
1288 AddLine("MOV.S {}, P{}.x;", temporary, static_cast<u64>(index));
1289 break;
1290 }
1291 if (predicate->IsNegated()) {
1292 AddLine("CMP.S {}, {}, 0, -1;", temporary, temporary);
1293 }
1294 return temporary;
1295 }
1296
1297 if (const auto abuf = std::get_if<AbufNode>(&*node)) {
1298 if (abuf->IsPhysicalBuffer()) {
1299 UNIMPLEMENTED_MSG("Physical buffers are not implemented");
1300 return "{0, 0, 0, 0}.x";
1301 }
1302
1303 const Attribute::Index index = abuf->GetIndex();
1304 const u32 element = abuf->GetElement();
1305 const char swizzle = Swizzle(element);
1306 switch (index) {
1307 case Attribute::Index::Position: {
1308 if (stage == ShaderType::Geometry) {
1309 return fmt::format("{}_position[{}].{}", StageInputName(stage),
1310 Visit(abuf->GetBuffer()), swizzle);
1311 } else {
1312 return fmt::format("{}.position.{}", StageInputName(stage), swizzle);
1313 }
1314 }
1315 case Attribute::Index::TessCoordInstanceIDVertexID:
1316 ASSERT(stage == ShaderType::Vertex);
1317 switch (element) {
1318 case 2:
1319 return "vertex.instance";
1320 case 3:
1321 return "vertex.id";
1322 }
1323 UNIMPLEMENTED_MSG("Unmanaged TessCoordInstanceIDVertexID element={}", element);
1324 break;
1325 case Attribute::Index::PointCoord:
1326 switch (element) {
1327 case 0:
1328 return "fragment.pointcoord.x";
1329 case 1:
1330 return "fragment.pointcoord.y";
1331 }
1332 UNIMPLEMENTED();
1333 break;
1334 case Attribute::Index::FrontFacing: {
1335 ASSERT(stage == ShaderType::Fragment);
1336 ASSERT(element == 3);
1337 const std::string temporary = AllocVectorTemporary();
1338 AddLine("SGT.S RC.x, fragment.facing, {{0, 0, 0, 0}};");
1339 AddLine("MOV.U.CC RC.x, -RC;");
1340 AddLine("MOV.S {}.x, 0;", temporary);
1341 AddLine("MOV.S {}.x (NE.x), -1;", temporary);
1342 return fmt::format("{}.x", temporary);
1343 }
1344 default:
1345 if (IsGenericAttribute(index)) {
1346 if (stage == ShaderType::Geometry) {
1347 return fmt::format("in_attr{}[{}][0].{}", GetGenericAttributeIndex(index),
1348 Visit(abuf->GetBuffer()), swizzle);
1349 } else {
1350 return fmt::format("{}.attrib[{}].{}", StageInputName(stage),
1351 GetGenericAttributeIndex(index), swizzle);
1352 }
1353 }
1354 UNIMPLEMENTED_MSG("Unimplemented input attribute={}", index);
1355 break;
1356 }
1357 return "{0, 0, 0, 0}.x";
1358 }
1359
1360 if (const auto cbuf = std::get_if<CbufNode>(&*node)) {
1361 std::string offset_string;
1362 const auto& offset = cbuf->GetOffset();
1363 if (const auto imm = std::get_if<ImmediateNode>(&*offset)) {
1364 offset_string = std::to_string(imm->GetValue());
1365 } else {
1366 offset_string = Visit(offset);
1367 }
1368 std::string temporary = AllocTemporary();
1369 AddLine("LDC.F32 {}, cbuf{}[{}];", temporary, cbuf->GetIndex(), offset_string);
1370 return temporary;
1371 }
1372
1373 if (const auto gmem = std::get_if<GmemNode>(&*node)) {
1374 std::string temporary = AllocTemporary();
1375 AddLine("MOV {}, 0;", temporary);
1376 AddLine("LOAD.U32 {} (NE.x), {};", temporary, GlobalMemoryPointer(*gmem));
1377 return temporary;
1378 }
1379
1380 if (const auto lmem = std::get_if<LmemNode>(&*node)) {
1381 std::string temporary = Visit(lmem->GetAddress());
1382 AddLine("SHR.U {}, {}, 2;", temporary, temporary);
1383 AddLine("MOV.U {}, lmem[{}].x;", temporary, temporary);
1384 return temporary;
1385 }
1386
1387 if (const auto smem = std::get_if<SmemNode>(&*node)) {
1388 std::string temporary = Visit(smem->GetAddress());
1389 AddLine("LDS.U32 {}, shared_mem[{}];", temporary, temporary);
1390 return temporary;
1391 }
1392
1393 if (const auto internal_flag = std::get_if<InternalFlagNode>(&*node)) {
1394 const std::size_t index = static_cast<std::size_t>(internal_flag->GetFlag());
1395 return fmt::format("{}.x", INTERNAL_FLAG_NAMES[index]);
1396 }
1397
1398 if (const auto conditional = std::get_if<ConditionalNode>(&*node)) {
1399 if (const auto amend_index = conditional->GetAmendIndex()) {
1400 Visit(ir.GetAmendNode(*amend_index));
1401 }
1402 AddLine("MOVC.U RC.x, {};", Visit(conditional->GetCondition()));
1403 AddLine("IF NE.x;");
1404 VisitBlock(conditional->GetCode());
1405 AddLine("ENDIF;");
1406 return {};
1407 }
1408
1409 if ([[maybe_unused]] const auto cmt = std::get_if<CommentNode>(&*node)) {
1410 // Uncommenting this will generate invalid code. GLASM lacks comments.
1411 // AddLine("// {}", cmt->GetText());
1412 return {};
1413 }
1414
1415 UNIMPLEMENTED();
1416 return {};
1417}
1418
1419std::tuple<std::string, std::string, std::size_t> ARBDecompiler::BuildCoords(Operation operation) {
1420 const auto& meta = std::get<MetaTexture>(operation.GetMeta());
1421 UNIMPLEMENTED_IF(meta.sampler.is_indexed);
1422
1423 const bool is_extended = meta.sampler.is_shadow && meta.sampler.is_array &&
1424 meta.sampler.type == Tegra::Shader::TextureType::TextureCube;
1425 const std::size_t count = operation.GetOperandsCount();
1426 std::string temporary = AllocVectorTemporary();
1427 std::size_t i = 0;
1428 for (; i < count; ++i) {
1429 AddLine("MOV.F {}.{}, {};", temporary, Swizzle(i), Visit(operation[i]));
1430 }
1431 if (meta.sampler.is_array) {
1432 AddLine("I2F.S {}.{}, {};", temporary, Swizzle(i), Visit(meta.array));
1433 ++i;
1434 }
1435 if (meta.sampler.is_shadow) {
1436 std::string compare = Visit(meta.depth_compare);
1437 if (is_extended) {
1438 ASSERT(i == 4);
1439 std::string extra_coord = AllocVectorTemporary();
1440 AddLine("MOV.F {}.x, {};", extra_coord, compare);
1441 return {fmt::format("{}, {}", temporary, extra_coord), extra_coord, 0};
1442 }
1443 AddLine("MOV.F {}.{}, {};", temporary, Swizzle(i), compare);
1444 ++i;
1445 }
1446 return {temporary, temporary, i};
1447}
1448
1449std::string ARBDecompiler::BuildAoffi(Operation operation) {
1450 const auto& meta = std::get<MetaTexture>(operation.GetMeta());
1451 if (meta.aoffi.empty()) {
1452 return {};
1453 }
1454 const std::string temporary = AllocVectorTemporary();
1455 std::size_t i = 0;
1456 for (auto& node : meta.aoffi) {
1457 AddLine("MOV.S {}.{}, {};", temporary, Swizzle(i++), Visit(node));
1458 }
1459 return fmt::format(", offset({})", temporary);
1460}
1461
1462std::string ARBDecompiler::GlobalMemoryPointer(const GmemNode& gmem) {
1463 // Read a bindless SSBO, return its address and set CC accordingly
1464 // address = c[binding].xy
1465 // length = c[binding].z
1466 const u32 binding = global_memory_names.at(gmem.GetDescriptor());
1467
1468 const std::string pointer = AllocLongVectorTemporary();
1469 std::string temporary = AllocTemporary();
1470
1471 AddLine("PK64.U {}, c[{}];", pointer, binding);
1472 AddLine("SUB.U {}, {}, {};", temporary, Visit(gmem.GetRealAddress()),
1473 Visit(gmem.GetBaseAddress()));
1474 AddLine("CVT.U64.U32 {}.z, {};", pointer, temporary);
1475 AddLine("ADD.U64 {}.x, {}.x, {}.z;", pointer, pointer, pointer);
1476 // Compare offset to length and set CC
1477 AddLine("SLT.U.CC RC.x, {}, c[{}].z;", temporary, binding);
1478 return fmt::format("{}.x", pointer);
1479}
1480
1481void ARBDecompiler::Exit() {
1482 if (stage != ShaderType::Fragment) {
1483 AddLine("RET;");
1484 return;
1485 }
1486
1487 const auto safe_get_register = [this](u32 reg) -> std::string {
1488 if (ir.GetRegisters().contains(reg)) {
1489 return fmt::format("R{}.x", reg);
1490 }
1491 return "{0, 0, 0, 0}.x";
1492 };
1493
1494 const auto& header = ir.GetHeader();
1495 u32 current_reg = 0;
1496 for (u32 rt = 0; rt < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets; ++rt) {
1497 for (u32 component = 0; component < 4; ++component) {
1498 if (!header.ps.IsColorComponentOutputEnabled(rt, component)) {
1499 continue;
1500 }
1501 AddLine("MOV.F result_color{}.{}, {};", rt, Swizzle(component),
1502 safe_get_register(current_reg));
1503 ++current_reg;
1504 }
1505 }
1506 if (header.ps.omap.depth) {
1507 AddLine("MOV.F result.depth.z, {};", safe_get_register(current_reg + 1));
1508 }
1509
1510 AddLine("RET;");
1511}
1512
1513std::string ARBDecompiler::Assign(Operation operation) {
1514 const Node& dest = operation[0];
1515 const Node& src = operation[1];
1516
1517 std::string dest_name;
1518 if (const auto gpr = std::get_if<GprNode>(&*dest)) {
1519 if (gpr->GetIndex() == Register::ZeroIndex) {
1520 // Writing to Register::ZeroIndex is a no op
1521 return {};
1522 }
1523 dest_name = fmt::format("R{}.x", gpr->GetIndex());
1524 } else if (const auto abuf = std::get_if<AbufNode>(&*dest)) {
1525 const u32 element = abuf->GetElement();
1526 const char swizzle = Swizzle(element);
1527 switch (const Attribute::Index index = abuf->GetIndex()) {
1528 case Attribute::Index::Position:
1529 dest_name = fmt::format("result.position.{}", swizzle);
1530 break;
1531 case Attribute::Index::LayerViewportPointSize:
1532 switch (element) {
1533 case 0:
1534 UNIMPLEMENTED();
1535 return {};
1536 case 1:
1537 case 2:
1538 if (!device.HasNvViewportArray2()) {
1539 LOG_ERROR(
1540 Render_OpenGL,
1541 "NV_viewport_array2 is missing. Maxwell gen 2 or better is required.");
1542 return {};
1543 }
1544 dest_name = element == 1 ? "result.layer.x" : "result.viewport.x";
1545 break;
1546 case 3:
1547 dest_name = "result.pointsize.x";
1548 break;
1549 }
1550 break;
1551 case Attribute::Index::ClipDistances0123:
1552 dest_name = fmt::format("result.clip[{}].x", element);
1553 break;
1554 case Attribute::Index::ClipDistances4567:
1555 dest_name = fmt::format("result.clip[{}].x", element + 4);
1556 break;
1557 default:
1558 if (!IsGenericAttribute(index)) {
1559 UNREACHABLE();
1560 return {};
1561 }
1562 dest_name =
1563 fmt::format("result.attrib[{}].{}", GetGenericAttributeIndex(index), swizzle);
1564 break;
1565 }
1566 } else if (const auto lmem = std::get_if<LmemNode>(&*dest)) {
1567 const std::string address = Visit(lmem->GetAddress());
1568 AddLine("SHR.U {}, {}, 2;", address, address);
1569 dest_name = fmt::format("lmem[{}].x", address);
1570 } else if (const auto smem = std::get_if<SmemNode>(&*dest)) {
1571 AddLine("STS.U32 {}, shared_mem[{}];", Visit(src), Visit(smem->GetAddress()));
1572 ResetTemporaries();
1573 return {};
1574 } else if (const auto gmem = std::get_if<GmemNode>(&*dest)) {
1575 AddLine("IF NE.x;");
1576 AddLine("STORE.U32 {}, {};", Visit(src), GlobalMemoryPointer(*gmem));
1577 AddLine("ENDIF;");
1578 ResetTemporaries();
1579 return {};
1580 } else {
1581 UNREACHABLE();
1582 ResetTemporaries();
1583 return {};
1584 }
1585
1586 AddLine("MOV.U {}, {};", dest_name, Visit(src));
1587 ResetTemporaries();
1588 return {};
1589}
1590
1591std::string ARBDecompiler::Select(Operation operation) {
1592 std::string temporary = AllocTemporary();
1593 AddLine("CMP.S {}, {}, {}, {};", temporary, Visit(operation[0]), Visit(operation[1]),
1594 Visit(operation[2]));
1595 return temporary;
1596}
1597
1598std::string ARBDecompiler::FClamp(Operation operation) {
1599 // 1.0f in hex, replace with std::bit_cast on C++20
1600 static constexpr u32 POSITIVE_ONE = 0x3f800000;
1601
1602 std::string temporary = AllocTemporary();
1603 const Node& value = operation[0];
1604 const Node& low = operation[1];
1605 const Node& high = operation[2];
1606 const auto* const imm_low = std::get_if<ImmediateNode>(&*low);
1607 const auto* const imm_high = std::get_if<ImmediateNode>(&*high);
1608 if (imm_low && imm_high && imm_low->GetValue() == 0 && imm_high->GetValue() == POSITIVE_ONE) {
1609 AddLine("MOV.F32.SAT {}, {};", temporary, Visit(value));
1610 } else {
1611 AddLine("MIN.F {}, {}, {};", temporary, Visit(value), Visit(high));
1612 AddLine("MAX.F {}, {}, {};", temporary, temporary, Visit(low));
1613 }
1614 return temporary;
1615}
1616
1617std::string ARBDecompiler::FCastHalf0(Operation operation) {
1618 const std::string temporary = AllocVectorTemporary();
1619 AddLine("UP2H.F {}.x, {};", temporary, Visit(operation[0]));
1620 return fmt::format("{}.x", temporary);
1621}
1622
1623std::string ARBDecompiler::FCastHalf1(Operation operation) {
1624 const std::string temporary = AllocVectorTemporary();
1625 AddLine("UP2H.F {}.y, {};", temporary, Visit(operation[0]));
1626 AddLine("MOV {}.x, {}.y;", temporary, temporary);
1627 return fmt::format("{}.x", temporary);
1628}
1629
1630std::string ARBDecompiler::FSqrt(Operation operation) {
1631 std::string temporary = AllocTemporary();
1632 AddLine("RSQ.F32 {}, {};", temporary, Visit(operation[0]));
1633 AddLine("RCP.F32 {}, {};", temporary, temporary);
1634 return temporary;
1635}
1636
1637std::string ARBDecompiler::FSwizzleAdd(Operation operation) {
1638 const std::string temporary = AllocVectorTemporary();
1639 if (!device.HasWarpIntrinsics()) {
1640 LOG_ERROR(Render_OpenGL,
1641 "NV_shader_thread_shuffle is missing. Kepler or better is required.");
1642 AddLine("ADD.F {}.x, {}, {};", temporary, Visit(operation[0]), Visit(operation[1]));
1643 return fmt::format("{}.x", temporary);
1644 }
1645
1646 AddLine("AND.U {}.z, {}.threadid, 3;", temporary, StageInputName(stage));
1647 AddLine("SHL.U {}.z, {}.z, 1;", temporary, temporary);
1648 AddLine("SHR.U {}.z, {}, {}.z;", temporary, Visit(operation[2]), temporary);
1649 AddLine("AND.U {}.z, {}.z, 3;", temporary, temporary);
1650 AddLine("MUL.F32 {}.x, {}, FSWZA[{}.z];", temporary, Visit(operation[0]), temporary);
1651 AddLine("MUL.F32 {}.y, {}, FSWZB[{}.z];", temporary, Visit(operation[1]), temporary);
1652 AddLine("ADD.F32 {}.x, {}.x, {}.y;", temporary, temporary, temporary);
1653 return fmt::format("{}.x", temporary);
1654}
1655
1656std::string ARBDecompiler::HAdd2(Operation operation) {
1657 const std::string tmp1 = AllocVectorTemporary();
1658 const std::string tmp2 = AllocVectorTemporary();
1659 AddLine("UP2H.F {}.xy, {};", tmp1, Visit(operation[0]));
1660 AddLine("UP2H.F {}.xy, {};", tmp2, Visit(operation[1]));
1661 AddLine("ADD.F16 {}, {}, {};", tmp1, tmp1, tmp2);
1662 AddLine("PK2H.F {}.x, {};", tmp1, tmp1);
1663 return fmt::format("{}.x", tmp1);
1664}
1665
1666std::string ARBDecompiler::HMul2(Operation operation) {
1667 const std::string tmp1 = AllocVectorTemporary();
1668 const std::string tmp2 = AllocVectorTemporary();
1669 AddLine("UP2H.F {}.xy, {};", tmp1, Visit(operation[0]));
1670 AddLine("UP2H.F {}.xy, {};", tmp2, Visit(operation[1]));
1671 AddLine("MUL.F16 {}, {}, {};", tmp1, tmp1, tmp2);
1672 AddLine("PK2H.F {}.x, {};", tmp1, tmp1);
1673 return fmt::format("{}.x", tmp1);
1674}
1675
1676std::string ARBDecompiler::HFma2(Operation operation) {
1677 const std::string tmp1 = AllocVectorTemporary();
1678 const std::string tmp2 = AllocVectorTemporary();
1679 const std::string tmp3 = AllocVectorTemporary();
1680 AddLine("UP2H.F {}.xy, {};", tmp1, Visit(operation[0]));
1681 AddLine("UP2H.F {}.xy, {};", tmp2, Visit(operation[1]));
1682 AddLine("UP2H.F {}.xy, {};", tmp3, Visit(operation[2]));
1683 AddLine("MAD.F16 {}, {}, {}, {};", tmp1, tmp1, tmp2, tmp3);
1684 AddLine("PK2H.F {}.x, {};", tmp1, tmp1);
1685 return fmt::format("{}.x", tmp1);
1686}
1687
1688std::string ARBDecompiler::HAbsolute(Operation operation) {
1689 const std::string temporary = AllocVectorTemporary();
1690 AddLine("UP2H.F {}.xy, {};", temporary, Visit(operation[0]));
1691 AddLine("PK2H.F {}.x, |{}|;", temporary, temporary);
1692 return fmt::format("{}.x", temporary);
1693}
1694
1695std::string ARBDecompiler::HNegate(Operation operation) {
1696 const std::string temporary = AllocVectorTemporary();
1697 AddLine("UP2H.F {}.xy, {};", temporary, Visit(operation[0]));
1698 AddLine("MOVC.S RC.x, {};", Visit(operation[1]));
1699 AddLine("MOV.F {}.x (NE.x), -{}.x;", temporary, temporary);
1700 AddLine("MOVC.S RC.x, {};", Visit(operation[2]));
1701 AddLine("MOV.F {}.y (NE.x), -{}.y;", temporary, temporary);
1702 AddLine("PK2H.F {}.x, {};", temporary, temporary);
1703 return fmt::format("{}.x", temporary);
1704}
1705
1706std::string ARBDecompiler::HClamp(Operation operation) {
1707 const std::string tmp1 = AllocVectorTemporary();
1708 const std::string tmp2 = AllocVectorTemporary();
1709 AddLine("UP2H.F {}.xy, {};", tmp1, Visit(operation[0]));
1710 AddLine("MOV.U {}.x, {};", tmp2, Visit(operation[1]));
1711 AddLine("MOV.U {}.y, {}.x;", tmp2, tmp2);
1712 AddLine("MAX.F {}, {}, {};", tmp1, tmp1, tmp2);
1713 AddLine("MOV.U {}.x, {};", tmp2, Visit(operation[2]));
1714 AddLine("MOV.U {}.y, {}.x;", tmp2, tmp2);
1715 AddLine("MIN.F {}, {}, {};", tmp1, tmp1, tmp2);
1716 AddLine("PK2H.F {}.x, {};", tmp1, tmp1);
1717 return fmt::format("{}.x", tmp1);
1718}
1719
1720std::string ARBDecompiler::HCastFloat(Operation operation) {
1721 const std::string temporary = AllocVectorTemporary();
1722 AddLine("MOV.F {}.y, {{0, 0, 0, 0}};", temporary);
1723 AddLine("MOV.F {}.x, {};", temporary, Visit(operation[0]));
1724 AddLine("PK2H.F {}.x, {};", temporary, temporary);
1725 return fmt::format("{}.x", temporary);
1726}
1727
1728std::string ARBDecompiler::HUnpack(Operation operation) {
1729 std::string operand = Visit(operation[0]);
1730 switch (std::get<Tegra::Shader::HalfType>(operation.GetMeta())) {
1731 case Tegra::Shader::HalfType::H0_H1:
1732 return operand;
1733 case Tegra::Shader::HalfType::F32: {
1734 const std::string temporary = AllocVectorTemporary();
1735 AddLine("MOV.U {}.x, {};", temporary, operand);
1736 AddLine("MOV.U {}.y, {}.x;", temporary, temporary);
1737 AddLine("PK2H.F {}.x, {};", temporary, temporary);
1738 return fmt::format("{}.x", temporary);
1739 }
1740 case Tegra::Shader::HalfType::H0_H0: {
1741 const std::string temporary = AllocVectorTemporary();
1742 AddLine("UP2H.F {}.xy, {};", temporary, operand);
1743 AddLine("MOV.U {}.y, {}.x;", temporary, temporary);
1744 AddLine("PK2H.F {}.x, {};", temporary, temporary);
1745 return fmt::format("{}.x", temporary);
1746 }
1747 case Tegra::Shader::HalfType::H1_H1: {
1748 const std::string temporary = AllocVectorTemporary();
1749 AddLine("UP2H.F {}.xy, {};", temporary, operand);
1750 AddLine("MOV.U {}.x, {}.y;", temporary, temporary);
1751 AddLine("PK2H.F {}.x, {};", temporary, temporary);
1752 return fmt::format("{}.x", temporary);
1753 }
1754 }
1755 UNREACHABLE();
1756 return "{0, 0, 0, 0}.x";
1757}
1758
1759std::string ARBDecompiler::HMergeF32(Operation operation) {
1760 const std::string temporary = AllocVectorTemporary();
1761 AddLine("UP2H.F {}.xy, {};", temporary, Visit(operation[0]));
1762 return fmt::format("{}.x", temporary);
1763}
1764
1765std::string ARBDecompiler::HMergeH0(Operation operation) {
1766 const std::string temporary = AllocVectorTemporary();
1767 AddLine("UP2H.F {}.xy, {};", temporary, Visit(operation[0]));
1768 AddLine("UP2H.F {}.zw, {};", temporary, Visit(operation[1]));
1769 AddLine("MOV.U {}.x, {}.z;", temporary, temporary);
1770 AddLine("PK2H.F {}.x, {};", temporary, temporary);
1771 return fmt::format("{}.x", temporary);
1772}
1773
1774std::string ARBDecompiler::HMergeH1(Operation operation) {
1775 const std::string temporary = AllocVectorTemporary();
1776 AddLine("UP2H.F {}.xy, {};", temporary, Visit(operation[0]));
1777 AddLine("UP2H.F {}.zw, {};", temporary, Visit(operation[1]));
1778 AddLine("MOV.U {}.y, {}.w;", temporary, temporary);
1779 AddLine("PK2H.F {}.x, {};", temporary, temporary);
1780 return fmt::format("{}.x", temporary);
1781}
1782
1783std::string ARBDecompiler::HPack2(Operation operation) {
1784 const std::string temporary = AllocVectorTemporary();
1785 AddLine("MOV.U {}.x, {};", temporary, Visit(operation[0]));
1786 AddLine("MOV.U {}.y, {};", temporary, Visit(operation[1]));
1787 AddLine("PK2H.F {}.x, {};", temporary, temporary);
1788 return fmt::format("{}.x", temporary);
1789}
1790
1791std::string ARBDecompiler::LogicalAssign(Operation operation) {
1792 const Node& dest = operation[0];
1793 const Node& src = operation[1];
1794
1795 std::string target;
1796
1797 if (const auto pred = std::get_if<PredicateNode>(&*dest)) {
1798 ASSERT_MSG(!pred->IsNegated(), "Negating logical assignment");
1799
1800 const Tegra::Shader::Pred index = pred->GetIndex();
1801 switch (index) {
1802 case Tegra::Shader::Pred::NeverExecute:
1803 case Tegra::Shader::Pred::UnusedIndex:
1804 // Writing to these predicates is a no-op
1805 return {};
1806 }
1807 target = fmt::format("P{}.x", static_cast<u64>(index));
1808 } else if (const auto internal_flag = std::get_if<InternalFlagNode>(&*dest)) {
1809 const std::size_t index = static_cast<std::size_t>(internal_flag->GetFlag());
1810 target = fmt::format("{}.x", INTERNAL_FLAG_NAMES[index]);
1811 } else {
1812 UNREACHABLE();
1813 ResetTemporaries();
1814 return {};
1815 }
1816
1817 AddLine("MOV.U {}, {};", target, Visit(src));
1818 ResetTemporaries();
1819 return {};
1820}
1821
1822std::string ARBDecompiler::LogicalPick2(Operation operation) {
1823 std::string temporary = AllocTemporary();
1824 const u32 index = std::get<ImmediateNode>(*operation[1]).GetValue();
1825 AddLine("MOV.U {}, {}.{};", temporary, Visit(operation[0]), Swizzle(index));
1826 return temporary;
1827}
1828
1829std::string ARBDecompiler::LogicalAnd2(Operation operation) {
1830 std::string temporary = AllocTemporary();
1831 const std::string op = Visit(operation[0]);
1832 AddLine("AND.U {}, {}.x, {}.y;", temporary, op, op);
1833 return temporary;
1834}
1835
1836std::string ARBDecompiler::FloatOrdered(Operation operation) {
1837 std::string temporary = AllocTemporary();
1838 AddLine("MOVC.F32 RC.x, {};", Visit(operation[0]));
1839 AddLine("MOVC.F32 RC.y, {};", Visit(operation[1]));
1840 AddLine("MOV.S {}, -1;", temporary);
1841 AddLine("MOV.S {} (NAN.x), 0;", temporary);
1842 AddLine("MOV.S {} (NAN.y), 0;", temporary);
1843 return temporary;
1844}
1845
1846std::string ARBDecompiler::FloatUnordered(Operation operation) {
1847 std::string temporary = AllocTemporary();
1848 AddLine("MOVC.F32 RC.x, {};", Visit(operation[0]));
1849 AddLine("MOVC.F32 RC.y, {};", Visit(operation[1]));
1850 AddLine("MOV.S {}, 0;", temporary);
1851 AddLine("MOV.S {} (NAN.x), -1;", temporary);
1852 AddLine("MOV.S {} (NAN.y), -1;", temporary);
1853 return temporary;
1854}
1855
1856std::string ARBDecompiler::LogicalAddCarry(Operation operation) {
1857 std::string temporary = AllocTemporary();
1858 AddLine("ADDC.U RC, {}, {};", Visit(operation[0]), Visit(operation[1]));
1859 AddLine("MOV.S {}, 0;", temporary);
1860 AddLine("IF CF.x;");
1861 AddLine("MOV.S {}, -1;", temporary);
1862 AddLine("ENDIF;");
1863 return temporary;
1864}
1865
1866std::string ARBDecompiler::Texture(Operation operation) {
1867 const auto& meta = std::get<MetaTexture>(operation.GetMeta());
1868 const u32 sampler_id = device.GetBaseBindings(stage).sampler + meta.sampler.index;
1869 const auto [coords, temporary, swizzle] = BuildCoords(operation);
1870
1871 std::string_view opcode = "TEX";
1872 std::string extra;
1873 if (meta.bias) {
1874 ASSERT(!meta.lod);
1875 opcode = "TXB";
1876
1877 if (swizzle < 4) {
1878 AddLine("MOV.F {}.w, {};", temporary, Visit(meta.bias));
1879 } else {
1880 const std::string bias = AllocTemporary();
1881 AddLine("MOV.F {}, {};", bias, Visit(meta.bias));
1882 extra = fmt::format(" {},", bias);
1883 }
1884 }
1885 if (meta.lod) {
1886 ASSERT(!meta.bias);
1887 opcode = "TXL";
1888
1889 if (swizzle < 4) {
1890 AddLine("MOV.F {}.w, {};", temporary, Visit(meta.lod));
1891 } else {
1892 const std::string lod = AllocTemporary();
1893 AddLine("MOV.F {}, {};", lod, Visit(meta.lod));
1894 extra = fmt::format(" {},", lod);
1895 }
1896 }
1897
1898 AddLine("{}.F {}, {},{} texture[{}], {}{};", opcode, temporary, coords, extra, sampler_id,
1899 TextureType(meta), BuildAoffi(operation));
1900 AddLine("MOV.U {}.x, {}.{};", temporary, temporary, Swizzle(meta.element));
1901 return fmt::format("{}.x", temporary);
1902}
1903
1904std::string ARBDecompiler::TextureGather(Operation operation) {
1905 const auto& meta = std::get<MetaTexture>(operation.GetMeta());
1906 const u32 sampler_id = device.GetBaseBindings(stage).sampler + meta.sampler.index;
1907 const auto [coords, temporary, swizzle] = BuildCoords(operation);
1908
1909 std::string comp;
1910 if (!meta.sampler.is_shadow) {
1911 const auto& immediate = std::get<ImmediateNode>(*meta.component);
1912 comp = fmt::format(".{}", Swizzle(immediate.GetValue()));
1913 }
1914
1915 AddLine("TXG.F {}, {}, texture[{}]{}, {}{};", temporary, temporary, sampler_id, comp,
1916 TextureType(meta), BuildAoffi(operation));
1917 AddLine("MOV.U {}.x, {}.{};", temporary, coords, Swizzle(meta.element));
1918 return fmt::format("{}.x", temporary);
1919}
1920
1921std::string ARBDecompiler::TextureQueryDimensions(Operation operation) {
1922 const auto& meta = std::get<MetaTexture>(operation.GetMeta());
1923 const std::string temporary = AllocVectorTemporary();
1924 const u32 sampler_id = device.GetBaseBindings(stage).sampler + meta.sampler.index;
1925
1926 ASSERT(!meta.sampler.is_array);
1927
1928 const std::string lod = operation.GetOperandsCount() > 0 ? Visit(operation[0]) : "0";
1929 AddLine("TXQ {}, {}, texture[{}], {};", temporary, lod, sampler_id, TextureType(meta));
1930 AddLine("MOV.U {}.x, {}.{};", temporary, temporary, Swizzle(meta.element));
1931 return fmt::format("{}.x", temporary);
1932}
1933
1934std::string ARBDecompiler::TextureQueryLod(Operation operation) {
1935 const auto& meta = std::get<MetaTexture>(operation.GetMeta());
1936 const std::string temporary = AllocVectorTemporary();
1937 const u32 sampler_id = device.GetBaseBindings(stage).sampler + meta.sampler.index;
1938
1939 ASSERT(!meta.sampler.is_array);
1940
1941 const std::size_t count = operation.GetOperandsCount();
1942 for (std::size_t i = 0; i < count; ++i) {
1943 AddLine("MOV.F {}.{}, {};", temporary, Swizzle(i), Visit(operation[i]));
1944 }
1945 AddLine("LOD.F {}, {}, texture[{}], {};", temporary, temporary, sampler_id, TextureType(meta));
1946 AddLine("MUL.F32 {}, {}, {{256, 256, 0, 0}};", temporary, temporary);
1947 AddLine("TRUNC.S {}, {};", temporary, temporary);
1948 AddLine("MOV.U {}.x, {}.{};", temporary, temporary, Swizzle(meta.element));
1949 return fmt::format("{}.x", temporary);
1950}
1951
1952std::string ARBDecompiler::TexelFetch(Operation operation) {
1953 const auto& meta = std::get<MetaTexture>(operation.GetMeta());
1954 const u32 sampler_id = device.GetBaseBindings(stage).sampler + meta.sampler.index;
1955 const auto [coords, temporary, swizzle] = BuildCoords(operation);
1956
1957 if (!meta.sampler.is_buffer) {
1958 ASSERT(swizzle < 4);
1959 AddLine("MOV.F {}.w, {};", temporary, Visit(meta.lod));
1960 }
1961 AddLine("TXF.F {}, {}, texture[{}], {}{};", temporary, coords, sampler_id, TextureType(meta),
1962 BuildAoffi(operation));
1963 AddLine("MOV.U {}.x, {}.{};", temporary, temporary, Swizzle(meta.element));
1964 return fmt::format("{}.x", temporary);
1965}
1966
1967std::string ARBDecompiler::TextureGradient(Operation operation) {
1968 const auto& meta = std::get<MetaTexture>(operation.GetMeta());
1969 const u32 sampler_id = device.GetBaseBindings(stage).sampler + meta.sampler.index;
1970 const std::string ddx = AllocVectorTemporary();
1971 const std::string ddy = AllocVectorTemporary();
1972 const std::string coord = std::get<1>(BuildCoords(operation));
1973
1974 const std::size_t num_components = meta.derivates.size() / 2;
1975 for (std::size_t index = 0; index < num_components; ++index) {
1976 const char swizzle = Swizzle(index);
1977 AddLine("MOV.F {}.{}, {};", ddx, swizzle, Visit(meta.derivates[index * 2]));
1978 AddLine("MOV.F {}.{}, {};", ddy, swizzle, Visit(meta.derivates[index * 2 + 1]));
1979 }
1980
1981 const std::string_view result = coord;
1982 AddLine("TXD.F {}, {}, {}, {}, texture[{}], {}{};", result, coord, ddx, ddy, sampler_id,
1983 TextureType(meta), BuildAoffi(operation));
1984 AddLine("MOV.F {}.x, {}.{};", result, result, Swizzle(meta.element));
1985 return fmt::format("{}.x", result);
1986}
1987
1988std::string ARBDecompiler::ImageLoad(Operation operation) {
1989 const auto& meta = std::get<MetaImage>(operation.GetMeta());
1990 const u32 image_id = device.GetBaseBindings(stage).image + meta.image.index;
1991 const std::size_t count = operation.GetOperandsCount();
1992 const std::string_view type = ImageType(meta.image.type);
1993
1994 const std::string temporary = AllocVectorTemporary();
1995 for (std::size_t i = 0; i < count; ++i) {
1996 AddLine("MOV.S {}.{}, {};", temporary, Swizzle(i), Visit(operation[i]));
1997 }
1998 AddLine("LOADIM.F {}, {}, image[{}], {};", temporary, temporary, image_id, type);
1999 AddLine("MOV.F {}.x, {}.{};", temporary, temporary, Swizzle(meta.element));
2000 return fmt::format("{}.x", temporary);
2001}
2002
2003std::string ARBDecompiler::ImageStore(Operation operation) {
2004 const auto& meta = std::get<MetaImage>(operation.GetMeta());
2005 const u32 image_id = device.GetBaseBindings(stage).image + meta.image.index;
2006 const std::size_t num_coords = operation.GetOperandsCount();
2007 const std::size_t num_values = meta.values.size();
2008 const std::string_view type = ImageType(meta.image.type);
2009
2010 const std::string coord = AllocVectorTemporary();
2011 const std::string value = AllocVectorTemporary();
2012 for (std::size_t i = 0; i < num_coords; ++i) {
2013 AddLine("MOV.S {}.{}, {};", coord, Swizzle(i), Visit(operation[i]));
2014 }
2015 for (std::size_t i = 0; i < num_values; ++i) {
2016 AddLine("MOV.F {}.{}, {};", value, Swizzle(i), Visit(meta.values[i]));
2017 }
2018 AddLine("STOREIM.F image[{}], {}, {}, {};", image_id, value, coord, type);
2019 return {};
2020}
2021
2022std::string ARBDecompiler::Branch(Operation operation) {
2023 const auto target = std::get<ImmediateNode>(*operation[0]);
2024 AddLine("MOV.U PC.x, {};", target.GetValue());
2025 AddLine("CONT;");
2026 return {};
2027}
2028
2029std::string ARBDecompiler::BranchIndirect(Operation operation) {
2030 AddLine("MOV.U PC.x, {};", Visit(operation[0]));
2031 AddLine("CONT;");
2032 return {};
2033}
2034
2035std::string ARBDecompiler::PushFlowStack(Operation operation) {
2036 const auto stack = std::get<MetaStackClass>(operation.GetMeta());
2037 const u32 target = std::get<ImmediateNode>(*operation[0]).GetValue();
2038 const std::string_view stack_name = StackName(stack);
2039 AddLine("MOV.U {}[{}_TOP.x].x, {};", stack_name, stack_name, target);
2040 AddLine("ADD.S {}_TOP.x, {}_TOP.x, 1;", stack_name, stack_name);
2041 return {};
2042}
2043
2044std::string ARBDecompiler::PopFlowStack(Operation operation) {
2045 const auto stack = std::get<MetaStackClass>(operation.GetMeta());
2046 const std::string_view stack_name = StackName(stack);
2047 AddLine("SUB.S {}_TOP.x, {}_TOP.x, 1;", stack_name, stack_name);
2048 AddLine("MOV.U PC.x, {}[{}_TOP.x].x;", stack_name, stack_name);
2049 AddLine("CONT;");
2050 return {};
2051}
2052
2053std::string ARBDecompiler::Exit(Operation) {
2054 Exit();
2055 return {};
2056}
2057
2058std::string ARBDecompiler::Discard(Operation) {
2059 AddLine("KIL TR;");
2060 return {};
2061}
2062
2063std::string ARBDecompiler::EmitVertex(Operation) {
2064 AddLine("EMIT;");
2065 return {};
2066}
2067
2068std::string ARBDecompiler::EndPrimitive(Operation) {
2069 AddLine("ENDPRIM;");
2070 return {};
2071}
2072
2073std::string ARBDecompiler::InvocationId(Operation) {
2074 return "primitive.invocation";
2075}
2076
2077std::string ARBDecompiler::YNegate(Operation) {
2078 LOG_WARNING(Render_OpenGL, "(STUBBED)");
2079 std::string temporary = AllocTemporary();
2080 AddLine("MOV.F {}, 1;", temporary);
2081 return temporary;
2082}
2083
2084std::string ARBDecompiler::ThreadId(Operation) {
2085 return fmt::format("{}.threadid", StageInputName(stage));
2086}
2087
2088std::string ARBDecompiler::ShuffleIndexed(Operation operation) {
2089 if (!device.HasWarpIntrinsics()) {
2090 LOG_ERROR(Render_OpenGL,
2091 "NV_shader_thread_shuffle is missing. Kepler or better is required.");
2092 return Visit(operation[0]);
2093 }
2094 const std::string temporary = AllocVectorTemporary();
2095 AddLine("SHFIDX.U {}, {}, {}, {{31, 0, 0, 0}};", temporary, Visit(operation[0]),
2096 Visit(operation[1]));
2097 AddLine("MOV.U {}.x, {}.y;", temporary, temporary);
2098 return fmt::format("{}.x", temporary);
2099}
2100
2101std::string ARBDecompiler::Barrier(Operation) {
2102 AddLine("BAR;");
2103 return {};
2104}
2105
2106std::string ARBDecompiler::MemoryBarrierGroup(Operation) {
2107 AddLine("MEMBAR.CTA;");
2108 return {};
2109}
2110
2111std::string ARBDecompiler::MemoryBarrierGlobal(Operation) {
2112 AddLine("MEMBAR;");
2113 return {};
2114}
2115
2116} // Anonymous namespace
2117
2118std::string DecompileAssemblyShader(const Device& device, const VideoCommon::Shader::ShaderIR& ir,
2119 const VideoCommon::Shader::Registry& registry,
2120 Tegra::Engines::ShaderType stage, std::string_view identifier) {
2121 return ARBDecompiler(device, ir, registry, stage, identifier).Code();
2122}
2123
2124} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_arb_decompiler.h b/src/video_core/renderer_opengl/gl_arb_decompiler.h
deleted file mode 100644
index 6afc87220..000000000
--- a/src/video_core/renderer_opengl/gl_arb_decompiler.h
+++ /dev/null
@@ -1,29 +0,0 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <string>
8#include <string_view>
9
10#include "common/common_types.h"
11
12namespace Tegra::Engines {
13enum class ShaderType : u32;
14}
15
16namespace VideoCommon::Shader {
17class ShaderIR;
18class Registry;
19} // namespace VideoCommon::Shader
20
21namespace OpenGL {
22
23class Device;
24
25std::string DecompileAssemblyShader(const Device& device, const VideoCommon::Shader::ShaderIR& ir,
26 const VideoCommon::Shader::Registry& registry,
27 Tegra::Engines::ShaderType stage, std::string_view identifier);
28
29} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index ceb3abcb2..3551dbdcc 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -54,40 +54,6 @@ namespace {
54 54
55constexpr size_t NUM_SUPPORTED_VERTEX_ATTRIBUTES = 16; 55constexpr size_t NUM_SUPPORTED_VERTEX_ATTRIBUTES = 16;
56 56
57struct TextureHandle {
58 constexpr TextureHandle(u32 data, bool via_header_index) {
59 const Tegra::Texture::TextureHandle handle{data};
60 image = handle.tic_id;
61 sampler = via_header_index ? image : handle.tsc_id.Value();
62 }
63
64 u32 image;
65 u32 sampler;
66};
67
68template <typename Engine, typename Entry>
69TextureHandle GetTextureInfo(const Engine& engine, bool via_header_index, const Entry& entry,
70 ShaderType shader_type, size_t index = 0) {
71 if constexpr (std::is_same_v<Entry, SamplerEntry>) {
72 if (entry.is_separated) {
73 const u32 buffer_1 = entry.buffer;
74 const u32 buffer_2 = entry.secondary_buffer;
75 const u32 offset_1 = entry.offset;
76 const u32 offset_2 = entry.secondary_offset;
77 const u32 handle_1 = engine.AccessConstBuffer32(shader_type, buffer_1, offset_1);
78 const u32 handle_2 = engine.AccessConstBuffer32(shader_type, buffer_2, offset_2);
79 return TextureHandle(handle_1 | handle_2, via_header_index);
80 }
81 }
82 if (entry.is_bindless) {
83 const u32 raw = engine.AccessConstBuffer32(shader_type, entry.buffer, entry.offset);
84 return TextureHandle(raw, via_header_index);
85 }
86 const u32 buffer = engine.GetBoundBuffer();
87 const u64 offset = (entry.offset + index) * sizeof(u32);
88 return TextureHandle(engine.AccessConstBuffer32(shader_type, buffer, offset), via_header_index);
89}
90
91/// Translates hardware transform feedback indices 57/// Translates hardware transform feedback indices
92/// @param location Hardware location 58/// @param location Hardware location
93/// @return Pair of ARB_transform_feedback3 token stream first and third arguments 59/// @return Pair of ARB_transform_feedback3 token stream first and third arguments
@@ -119,44 +85,6 @@ std::pair<GLint, GLint> TransformFeedbackEnum(u8 location) {
119void oglEnable(GLenum cap, bool state) { 85void oglEnable(GLenum cap, bool state) {
120 (state ? glEnable : glDisable)(cap); 86 (state ? glEnable : glDisable)(cap);
121} 87}
122
123ImageViewType ImageViewTypeFromEntry(const SamplerEntry& entry) {
124 if (entry.is_buffer) {
125 return ImageViewType::Buffer;
126 }
127 switch (entry.type) {
128 case Tegra::Shader::TextureType::Texture1D:
129 return entry.is_array ? ImageViewType::e1DArray : ImageViewType::e1D;
130 case Tegra::Shader::TextureType::Texture2D:
131 return entry.is_array ? ImageViewType::e2DArray : ImageViewType::e2D;
132 case Tegra::Shader::TextureType::Texture3D:
133 return ImageViewType::e3D;
134 case Tegra::Shader::TextureType::TextureCube:
135 return entry.is_array ? ImageViewType::CubeArray : ImageViewType::Cube;
136 }
137 UNREACHABLE();
138 return ImageViewType::e2D;
139}
140
141ImageViewType ImageViewTypeFromEntry(const ImageEntry& entry) {
142 switch (entry.type) {
143 case Tegra::Shader::ImageType::Texture1D:
144 return ImageViewType::e1D;
145 case Tegra::Shader::ImageType::Texture1DArray:
146 return ImageViewType::e1DArray;
147 case Tegra::Shader::ImageType::Texture2D:
148 return ImageViewType::e2D;
149 case Tegra::Shader::ImageType::Texture2DArray:
150 return ImageViewType::e2DArray;
151 case Tegra::Shader::ImageType::Texture3D:
152 return ImageViewType::e3D;
153 case Tegra::Shader::ImageType::TextureBuffer:
154 return ImageViewType::Buffer;
155 }
156 UNREACHABLE();
157 return ImageViewType::e2D;
158}
159
160} // Anonymous namespace 88} // Anonymous namespace
161 89
162RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_, 90RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_,
@@ -172,12 +100,7 @@ RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window_, Tegra
172 buffer_cache(*this, maxwell3d, kepler_compute, gpu_memory, cpu_memory_, buffer_cache_runtime), 100 buffer_cache(*this, maxwell3d, kepler_compute, gpu_memory, cpu_memory_, buffer_cache_runtime),
173 shader_cache(*this, emu_window_, gpu, maxwell3d, kepler_compute, gpu_memory, device), 101 shader_cache(*this, emu_window_, gpu, maxwell3d, kepler_compute, gpu_memory, device),
174 query_cache(*this, maxwell3d, gpu_memory), accelerate_dma(buffer_cache), 102 query_cache(*this, maxwell3d, gpu_memory), accelerate_dma(buffer_cache),
175 fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache), 103 fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache) {}
176 async_shaders(emu_window_) {
177 if (device.UseAsynchronousShaders()) {
178 async_shaders.AllocateWorkers();
179 }
180}
181 104
182RasterizerOpenGL::~RasterizerOpenGL() = default; 105RasterizerOpenGL::~RasterizerOpenGL() = default;
183 106
@@ -244,117 +167,8 @@ void RasterizerOpenGL::SyncVertexInstances() {
244 } 167 }
245} 168}
246 169
247void RasterizerOpenGL::SetupShaders(bool is_indexed) {
248 u32 clip_distances = 0;
249
250 std::array<Shader*, Maxwell::MaxShaderStage> shaders{};
251 image_view_indices.clear();
252 sampler_handles.clear();
253
254 texture_cache.SynchronizeGraphicsDescriptors();
255
256 for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) {
257 const auto& shader_config = maxwell3d.regs.shader_config[index];
258 const auto program{static_cast<Maxwell::ShaderProgram>(index)};
259
260 // Skip stages that are not enabled
261 if (!maxwell3d.regs.IsShaderConfigEnabled(index)) {
262 switch (program) {
263 case Maxwell::ShaderProgram::Geometry:
264 program_manager.UseGeometryShader(0);
265 break;
266 case Maxwell::ShaderProgram::Fragment:
267 program_manager.UseFragmentShader(0);
268 break;
269 default:
270 break;
271 }
272 continue;
273 }
274 // Currently this stages are not supported in the OpenGL backend.
275 // TODO(Blinkhawk): Port tesselation shaders from Vulkan to OpenGL
276 if (program == Maxwell::ShaderProgram::TesselationControl ||
277 program == Maxwell::ShaderProgram::TesselationEval) {
278 continue;
279 }
280
281 Shader* const shader = shader_cache.GetStageProgram(program, async_shaders);
282 const GLuint program_handle = shader->IsBuilt() ? shader->GetHandle() : 0;
283 switch (program) {
284 case Maxwell::ShaderProgram::VertexA:
285 case Maxwell::ShaderProgram::VertexB:
286 program_manager.UseVertexShader(program_handle);
287 break;
288 case Maxwell::ShaderProgram::Geometry:
289 program_manager.UseGeometryShader(program_handle);
290 break;
291 case Maxwell::ShaderProgram::Fragment:
292 program_manager.UseFragmentShader(program_handle);
293 break;
294 default:
295 UNIMPLEMENTED_MSG("Unimplemented shader index={}, enable={}, offset=0x{:08X}", index,
296 shader_config.enable.Value(), shader_config.offset);
297 break;
298 }
299
300 // Stage indices are 0 - 5
301 const size_t stage = index == 0 ? 0 : index - 1;
302 shaders[stage] = shader;
303
304 SetupDrawTextures(shader, stage);
305 SetupDrawImages(shader, stage);
306
307 buffer_cache.SetEnabledUniformBuffers(stage, shader->GetEntries().enabled_uniform_buffers);
308
309 buffer_cache.UnbindGraphicsStorageBuffers(stage);
310 u32 ssbo_index = 0;
311 for (const auto& buffer : shader->GetEntries().global_memory_entries) {
312 buffer_cache.BindGraphicsStorageBuffer(stage, ssbo_index, buffer.cbuf_index,
313 buffer.cbuf_offset, buffer.is_written);
314 ++ssbo_index;
315 }
316
317 // Workaround for Intel drivers.
318 // When a clip distance is enabled but not set in the shader it crops parts of the screen
319 // (sometimes it's half the screen, sometimes three quarters). To avoid this, enable the
320 // clip distances only when it's written by a shader stage.
321 clip_distances |= shader->GetEntries().clip_distances;
322
323 // When VertexA is enabled, we have dual vertex shaders
324 if (program == Maxwell::ShaderProgram::VertexA) {
325 // VertexB was combined with VertexA, so we skip the VertexB iteration
326 ++index;
327 }
328 }
329 SyncClipEnabled(clip_distances);
330 maxwell3d.dirty.flags[Dirty::Shaders] = false;
331
332 buffer_cache.UpdateGraphicsBuffers(is_indexed);
333
334 const std::span indices_span(image_view_indices.data(), image_view_indices.size());
335 texture_cache.FillGraphicsImageViews(indices_span, image_view_ids);
336
337 buffer_cache.BindHostGeometryBuffers(is_indexed);
338
339 size_t image_view_index = 0;
340 size_t texture_index = 0;
341 size_t image_index = 0;
342 for (size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) {
343 const Shader* const shader = shaders[stage];
344 if (!shader) {
345 continue;
346 }
347 buffer_cache.BindHostStageBuffers(stage);
348 const auto& base = device.GetBaseBindings(stage);
349 BindTextures(shader->GetEntries(), base.sampler, base.image, image_view_index,
350 texture_index, image_index);
351 }
352}
353
354void RasterizerOpenGL::LoadDiskResources(u64 title_id, std::stop_token stop_loading, 170void RasterizerOpenGL::LoadDiskResources(u64 title_id, std::stop_token stop_loading,
355 const VideoCore::DiskResourceLoadCallback& callback) { 171 const VideoCore::DiskResourceLoadCallback& callback) {}
356 shader_cache.LoadDiskCache(title_id, stop_loading, callback);
357}
358 172
359void RasterizerOpenGL::Clear() { 173void RasterizerOpenGL::Clear() {
360 MICROPROFILE_SCOPE(OpenGL_Clears); 174 MICROPROFILE_SCOPE(OpenGL_Clears);
@@ -434,7 +248,6 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
434 248
435 // Setup shaders and their used resources. 249 // Setup shaders and their used resources.
436 std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex}; 250 std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex};
437 SetupShaders(is_indexed);
438 251
439 texture_cache.UpdateRenderTargets(false); 252 texture_cache.UpdateRenderTargets(false);
440 state_tracker.BindFramebuffer(texture_cache.GetFramebuffer()->Handle()); 253 state_tracker.BindFramebuffer(texture_cache.GetFramebuffer()->Handle());
@@ -488,27 +301,8 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
488 gpu.TickWork(); 301 gpu.TickWork();
489} 302}
490 303
491void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) { 304void RasterizerOpenGL::DispatchCompute() {
492 Shader* const kernel = shader_cache.GetComputeKernel(code_addr); 305 UNREACHABLE_MSG("Not implemented");
493
494 std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex};
495 BindComputeTextures(kernel);
496
497 const auto& entries = kernel->GetEntries();
498 buffer_cache.SetEnabledComputeUniformBuffers(entries.enabled_uniform_buffers);
499 buffer_cache.UnbindComputeStorageBuffers();
500 u32 ssbo_index = 0;
501 for (const auto& buffer : entries.global_memory_entries) {
502 buffer_cache.BindComputeStorageBuffer(ssbo_index, buffer.cbuf_index, buffer.cbuf_offset,
503 buffer.is_written);
504 ++ssbo_index;
505 }
506 buffer_cache.UpdateComputeBuffers();
507 buffer_cache.BindHostComputeBuffers();
508
509 const auto& launch_desc = kepler_compute.launch_description;
510 glDispatchCompute(launch_desc.grid_dim_x, launch_desc.grid_dim_y, launch_desc.grid_dim_z);
511 ++num_queued_commands;
512} 306}
513 307
514void RasterizerOpenGL::ResetCounter(VideoCore::QueryType type) { 308void RasterizerOpenGL::ResetCounter(VideoCore::QueryType type) {
@@ -726,106 +520,6 @@ bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config,
726 return true; 520 return true;
727} 521}
728 522
729void RasterizerOpenGL::BindComputeTextures(Shader* kernel) {
730 image_view_indices.clear();
731 sampler_handles.clear();
732
733 texture_cache.SynchronizeComputeDescriptors();
734
735 SetupComputeTextures(kernel);
736 SetupComputeImages(kernel);
737
738 const std::span indices_span(image_view_indices.data(), image_view_indices.size());
739 texture_cache.FillComputeImageViews(indices_span, image_view_ids);
740
741 program_manager.BindCompute(kernel->GetHandle());
742 size_t image_view_index = 0;
743 size_t texture_index = 0;
744 size_t image_index = 0;
745 BindTextures(kernel->GetEntries(), 0, 0, image_view_index, texture_index, image_index);
746}
747
748void RasterizerOpenGL::BindTextures(const ShaderEntries& entries, GLuint base_texture,
749 GLuint base_image, size_t& image_view_index,
750 size_t& texture_index, size_t& image_index) {
751 const GLuint* const samplers = sampler_handles.data() + texture_index;
752 const GLuint* const textures = texture_handles.data() + texture_index;
753 const GLuint* const images = image_handles.data() + image_index;
754
755 const size_t num_samplers = entries.samplers.size();
756 for (const auto& sampler : entries.samplers) {
757 for (size_t i = 0; i < sampler.size; ++i) {
758 const ImageViewId image_view_id = image_view_ids[image_view_index++];
759 const ImageView& image_view = texture_cache.GetImageView(image_view_id);
760 const GLuint handle = image_view.Handle(ImageViewTypeFromEntry(sampler));
761 texture_handles[texture_index++] = handle;
762 }
763 }
764 const size_t num_images = entries.images.size();
765 for (size_t unit = 0; unit < num_images; ++unit) {
766 // TODO: Mark as modified
767 const ImageViewId image_view_id = image_view_ids[image_view_index++];
768 const ImageView& image_view = texture_cache.GetImageView(image_view_id);
769 const GLuint handle = image_view.Handle(ImageViewTypeFromEntry(entries.images[unit]));
770 image_handles[image_index] = handle;
771 ++image_index;
772 }
773 if (num_samplers > 0) {
774 glBindSamplers(base_texture, static_cast<GLsizei>(num_samplers), samplers);
775 glBindTextures(base_texture, static_cast<GLsizei>(num_samplers), textures);
776 }
777 if (num_images > 0) {
778 glBindImageTextures(base_image, static_cast<GLsizei>(num_images), images);
779 }
780}
781
782void RasterizerOpenGL::SetupDrawTextures(const Shader* shader, size_t stage_index) {
783 const bool via_header_index =
784 maxwell3d.regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex;
785 for (const auto& entry : shader->GetEntries().samplers) {
786 const auto shader_type = static_cast<ShaderType>(stage_index);
787 for (size_t index = 0; index < entry.size; ++index) {
788 const auto handle =
789 GetTextureInfo(maxwell3d, via_header_index, entry, shader_type, index);
790 const Sampler* const sampler = texture_cache.GetGraphicsSampler(handle.sampler);
791 sampler_handles.push_back(sampler->Handle());
792 image_view_indices.push_back(handle.image);
793 }
794 }
795}
796
797void RasterizerOpenGL::SetupComputeTextures(const Shader* kernel) {
798 const bool via_header_index = kepler_compute.launch_description.linked_tsc;
799 for (const auto& entry : kernel->GetEntries().samplers) {
800 for (size_t i = 0; i < entry.size; ++i) {
801 const auto handle =
802 GetTextureInfo(kepler_compute, via_header_index, entry, ShaderType::Compute, i);
803 const Sampler* const sampler = texture_cache.GetComputeSampler(handle.sampler);
804 sampler_handles.push_back(sampler->Handle());
805 image_view_indices.push_back(handle.image);
806 }
807 }
808}
809
810void RasterizerOpenGL::SetupDrawImages(const Shader* shader, size_t stage_index) {
811 const bool via_header_index =
812 maxwell3d.regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex;
813 for (const auto& entry : shader->GetEntries().images) {
814 const auto shader_type = static_cast<ShaderType>(stage_index);
815 const auto handle = GetTextureInfo(maxwell3d, via_header_index, entry, shader_type);
816 image_view_indices.push_back(handle.image);
817 }
818}
819
820void RasterizerOpenGL::SetupComputeImages(const Shader* shader) {
821 const bool via_header_index = kepler_compute.launch_description.linked_tsc;
822 for (const auto& entry : shader->GetEntries().images) {
823 const auto handle =
824 GetTextureInfo(kepler_compute, via_header_index, entry, ShaderType::Compute);
825 image_view_indices.push_back(handle.image);
826 }
827}
828
829void RasterizerOpenGL::SyncState() { 523void RasterizerOpenGL::SyncState() {
830 SyncViewport(); 524 SyncViewport();
831 SyncRasterizeEnable(); 525 SyncRasterizeEnable();
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
index d30ad698f..1f58f8791 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -28,11 +28,9 @@
28#include "video_core/renderer_opengl/gl_query_cache.h" 28#include "video_core/renderer_opengl/gl_query_cache.h"
29#include "video_core/renderer_opengl/gl_resource_manager.h" 29#include "video_core/renderer_opengl/gl_resource_manager.h"
30#include "video_core/renderer_opengl/gl_shader_cache.h" 30#include "video_core/renderer_opengl/gl_shader_cache.h"
31#include "video_core/renderer_opengl/gl_shader_decompiler.h"
32#include "video_core/renderer_opengl/gl_shader_manager.h" 31#include "video_core/renderer_opengl/gl_shader_manager.h"
33#include "video_core/renderer_opengl/gl_state_tracker.h" 32#include "video_core/renderer_opengl/gl_state_tracker.h"
34#include "video_core/renderer_opengl/gl_texture_cache.h" 33#include "video_core/renderer_opengl/gl_texture_cache.h"
35#include "video_core/shader/async_shaders.h"
36#include "video_core/textures/texture.h" 34#include "video_core/textures/texture.h"
37 35
38namespace Core::Memory { 36namespace Core::Memory {
@@ -81,7 +79,7 @@ public:
81 79
82 void Draw(bool is_indexed, bool is_instanced) override; 80 void Draw(bool is_indexed, bool is_instanced) override;
83 void Clear() override; 81 void Clear() override;
84 void DispatchCompute(GPUVAddr code_addr) override; 82 void DispatchCompute() override;
85 void ResetCounter(VideoCore::QueryType type) override; 83 void ResetCounter(VideoCore::QueryType type) override;
86 void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) override; 84 void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) override;
87 void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, u32 size) override; 85 void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, u32 size) override;
@@ -118,36 +116,11 @@ public:
118 return num_queued_commands > 0; 116 return num_queued_commands > 0;
119 } 117 }
120 118
121 VideoCommon::Shader::AsyncShaders& GetAsyncShaders() {
122 return async_shaders;
123 }
124
125 const VideoCommon::Shader::AsyncShaders& GetAsyncShaders() const {
126 return async_shaders;
127 }
128
129private: 119private:
130 static constexpr size_t MAX_TEXTURES = 192; 120 static constexpr size_t MAX_TEXTURES = 192;
131 static constexpr size_t MAX_IMAGES = 48; 121 static constexpr size_t MAX_IMAGES = 48;
132 static constexpr size_t MAX_IMAGE_VIEWS = MAX_TEXTURES + MAX_IMAGES; 122 static constexpr size_t MAX_IMAGE_VIEWS = MAX_TEXTURES + MAX_IMAGES;
133 123
134 void BindComputeTextures(Shader* kernel);
135
136 void BindTextures(const ShaderEntries& entries, GLuint base_texture, GLuint base_image,
137 size_t& image_view_index, size_t& texture_index, size_t& image_index);
138
139 /// Configures the current textures to use for the draw command.
140 void SetupDrawTextures(const Shader* shader, size_t stage_index);
141
142 /// Configures the textures used in a compute shader.
143 void SetupComputeTextures(const Shader* kernel);
144
145 /// Configures images in a graphics shader.
146 void SetupDrawImages(const Shader* shader, size_t stage_index);
147
148 /// Configures images in a compute shader.
149 void SetupComputeImages(const Shader* shader);
150
151 /// Syncs state to match guest's 124 /// Syncs state to match guest's
152 void SyncState(); 125 void SyncState();
153 126
@@ -230,8 +203,6 @@ private:
230 /// End a transform feedback 203 /// End a transform feedback
231 void EndTransformFeedback(); 204 void EndTransformFeedback();
232 205
233 void SetupShaders(bool is_indexed);
234
235 Tegra::GPU& gpu; 206 Tegra::GPU& gpu;
236 Tegra::Engines::Maxwell3D& maxwell3d; 207 Tegra::Engines::Maxwell3D& maxwell3d;
237 Tegra::Engines::KeplerCompute& kepler_compute; 208 Tegra::Engines::KeplerCompute& kepler_compute;
@@ -251,8 +222,6 @@ private:
251 AccelerateDMA accelerate_dma; 222 AccelerateDMA accelerate_dma;
252 FenceManagerOpenGL fence_manager; 223 FenceManagerOpenGL fence_manager;
253 224
254 VideoCommon::Shader::AsyncShaders async_shaders;
255
256 boost::container::static_vector<u32, MAX_IMAGE_VIEWS> image_view_indices; 225 boost::container::static_vector<u32, MAX_IMAGE_VIEWS> image_view_indices;
257 std::array<ImageViewId, MAX_IMAGE_VIEWS> image_view_ids; 226 std::array<ImageViewId, MAX_IMAGE_VIEWS> image_view_ids;
258 boost::container::static_vector<GLuint, MAX_TEXTURES> sampler_handles; 227 boost::container::static_vector<GLuint, MAX_TEXTURES> sampler_handles;
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp
index 5a01c59ec..4dd166156 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp
@@ -20,307 +20,19 @@
20#include "video_core/engines/maxwell_3d.h" 20#include "video_core/engines/maxwell_3d.h"
21#include "video_core/engines/shader_type.h" 21#include "video_core/engines/shader_type.h"
22#include "video_core/memory_manager.h" 22#include "video_core/memory_manager.h"
23#include "video_core/renderer_opengl/gl_arb_decompiler.h"
24#include "video_core/renderer_opengl/gl_rasterizer.h" 23#include "video_core/renderer_opengl/gl_rasterizer.h"
25#include "video_core/renderer_opengl/gl_resource_manager.h" 24#include "video_core/renderer_opengl/gl_resource_manager.h"
26#include "video_core/renderer_opengl/gl_shader_cache.h" 25#include "video_core/renderer_opengl/gl_shader_cache.h"
27#include "video_core/renderer_opengl/gl_shader_decompiler.h"
28#include "video_core/renderer_opengl/gl_shader_disk_cache.h"
29#include "video_core/renderer_opengl/gl_state_tracker.h" 26#include "video_core/renderer_opengl/gl_state_tracker.h"
30#include "video_core/shader/memory_util.h"
31#include "video_core/shader/registry.h"
32#include "video_core/shader/shader_ir.h"
33#include "video_core/shader_cache.h" 27#include "video_core/shader_cache.h"
34#include "video_core/shader_notify.h" 28#include "video_core/shader_notify.h"
35 29
36namespace OpenGL { 30namespace OpenGL {
37 31
38using Tegra::Engines::ShaderType; 32Shader::Shader() = default;
39using VideoCommon::Shader::GetShaderAddress;
40using VideoCommon::Shader::GetShaderCode;
41using VideoCommon::Shader::GetUniqueIdentifier;
42using VideoCommon::Shader::KERNEL_MAIN_OFFSET;
43using VideoCommon::Shader::ProgramCode;
44using VideoCommon::Shader::Registry;
45using VideoCommon::Shader::ShaderIR;
46using VideoCommon::Shader::STAGE_MAIN_OFFSET;
47
48namespace {
49
50constexpr VideoCommon::Shader::CompilerSettings COMPILER_SETTINGS{};
51
52/// Gets the shader type from a Maxwell program type
53constexpr GLenum GetGLShaderType(ShaderType shader_type) {
54 switch (shader_type) {
55 case ShaderType::Vertex:
56 return GL_VERTEX_SHADER;
57 case ShaderType::Geometry:
58 return GL_GEOMETRY_SHADER;
59 case ShaderType::Fragment:
60 return GL_FRAGMENT_SHADER;
61 case ShaderType::Compute:
62 return GL_COMPUTE_SHADER;
63 default:
64 return GL_NONE;
65 }
66}
67
68constexpr const char* GetShaderTypeName(ShaderType shader_type) {
69 switch (shader_type) {
70 case ShaderType::Vertex:
71 return "VS";
72 case ShaderType::TesselationControl:
73 return "HS";
74 case ShaderType::TesselationEval:
75 return "DS";
76 case ShaderType::Geometry:
77 return "GS";
78 case ShaderType::Fragment:
79 return "FS";
80 case ShaderType::Compute:
81 return "CS";
82 }
83 return "UNK";
84}
85
86constexpr ShaderType GetShaderType(Maxwell::ShaderProgram program_type) {
87 switch (program_type) {
88 case Maxwell::ShaderProgram::VertexA:
89 case Maxwell::ShaderProgram::VertexB:
90 return ShaderType::Vertex;
91 case Maxwell::ShaderProgram::TesselationControl:
92 return ShaderType::TesselationControl;
93 case Maxwell::ShaderProgram::TesselationEval:
94 return ShaderType::TesselationEval;
95 case Maxwell::ShaderProgram::Geometry:
96 return ShaderType::Geometry;
97 case Maxwell::ShaderProgram::Fragment:
98 return ShaderType::Fragment;
99 }
100 return {};
101}
102
103constexpr GLenum AssemblyEnum(ShaderType shader_type) {
104 switch (shader_type) {
105 case ShaderType::Vertex:
106 return GL_VERTEX_PROGRAM_NV;
107 case ShaderType::TesselationControl:
108 return GL_TESS_CONTROL_PROGRAM_NV;
109 case ShaderType::TesselationEval:
110 return GL_TESS_EVALUATION_PROGRAM_NV;
111 case ShaderType::Geometry:
112 return GL_GEOMETRY_PROGRAM_NV;
113 case ShaderType::Fragment:
114 return GL_FRAGMENT_PROGRAM_NV;
115 case ShaderType::Compute:
116 return GL_COMPUTE_PROGRAM_NV;
117 }
118 return {};
119}
120
121std::string MakeShaderID(u64 unique_identifier, ShaderType shader_type) {
122 return fmt::format("{}{:016X}", GetShaderTypeName(shader_type), unique_identifier);
123}
124
125std::shared_ptr<Registry> MakeRegistry(const ShaderDiskCacheEntry& entry) {
126 const VideoCore::GuestDriverProfile guest_profile{entry.texture_handler_size};
127 const VideoCommon::Shader::SerializedRegistryInfo info{guest_profile, entry.bound_buffer,
128 entry.graphics_info, entry.compute_info};
129 auto registry = std::make_shared<Registry>(entry.type, info);
130 for (const auto& [address, value] : entry.keys) {
131 const auto [buffer, offset] = address;
132 registry->InsertKey(buffer, offset, value);
133 }
134 for (const auto& [offset, sampler] : entry.bound_samplers) {
135 registry->InsertBoundSampler(offset, sampler);
136 }
137 for (const auto& [key, sampler] : entry.bindless_samplers) {
138 const auto [buffer, offset] = key;
139 registry->InsertBindlessSampler(buffer, offset, sampler);
140 }
141 return registry;
142}
143
144std::unordered_set<GLenum> GetSupportedFormats() {
145 GLint num_formats;
146 glGetIntegerv(GL_NUM_PROGRAM_BINARY_FORMATS, &num_formats);
147
148 std::vector<GLint> formats(num_formats);
149 glGetIntegerv(GL_PROGRAM_BINARY_FORMATS, formats.data());
150
151 std::unordered_set<GLenum> supported_formats;
152 for (const GLint format : formats) {
153 supported_formats.insert(static_cast<GLenum>(format));
154 }
155 return supported_formats;
156}
157
158} // Anonymous namespace
159
160ProgramSharedPtr BuildShader(const Device& device, ShaderType shader_type, u64 unique_identifier,
161 const ShaderIR& ir, const Registry& registry, bool hint_retrievable) {
162 if (device.UseDriverCache()) {
163 // Ignore hint retrievable if we are using the driver cache
164 hint_retrievable = false;
165 }
166 const std::string shader_id = MakeShaderID(unique_identifier, shader_type);
167 LOG_INFO(Render_OpenGL, "{}", shader_id);
168
169 auto program = std::make_shared<ProgramHandle>();
170
171 if (device.UseAssemblyShaders()) {
172 const std::string arb =
173 DecompileAssemblyShader(device, ir, registry, shader_type, shader_id);
174
175 GLuint& arb_prog = program->assembly_program.handle;
176
177// Commented out functions signal OpenGL errors but are compatible with apitrace.
178// Use them only to capture and replay on apitrace.
179#if 0
180 glGenProgramsNV(1, &arb_prog);
181 glLoadProgramNV(AssemblyEnum(shader_type), arb_prog, static_cast<GLsizei>(arb.size()),
182 reinterpret_cast<const GLubyte*>(arb.data()));
183#else
184 glGenProgramsARB(1, &arb_prog);
185 glNamedProgramStringEXT(arb_prog, AssemblyEnum(shader_type), GL_PROGRAM_FORMAT_ASCII_ARB,
186 static_cast<GLsizei>(arb.size()), arb.data());
187#endif
188 const auto err = reinterpret_cast<const char*>(glGetString(GL_PROGRAM_ERROR_STRING_NV));
189 if (err && *err) {
190 LOG_CRITICAL(Render_OpenGL, "{}", err);
191 LOG_INFO(Render_OpenGL, "\n{}", arb);
192 }
193 } else {
194 const std::string glsl = DecompileShader(device, ir, registry, shader_type, shader_id);
195 OGLShader shader;
196 shader.Create(glsl.c_str(), GetGLShaderType(shader_type));
197
198 program->source_program.Create(true, hint_retrievable, shader.handle);
199 }
200
201 return program;
202}
203
204Shader::Shader(std::shared_ptr<Registry> registry_, ShaderEntries entries_,
205 ProgramSharedPtr program_, bool is_built_)
206 : registry{std::move(registry_)}, entries{std::move(entries_)}, program{std::move(program_)},
207 is_built{is_built_} {
208 handle = program->assembly_program.handle;
209 if (handle == 0) {
210 handle = program->source_program.handle;
211 }
212 if (is_built) {
213 ASSERT(handle != 0);
214 }
215}
216 33
217Shader::~Shader() = default; 34Shader::~Shader() = default;
218 35
219GLuint Shader::GetHandle() const {
220 DEBUG_ASSERT(registry->IsConsistent());
221 return handle;
222}
223
224bool Shader::IsBuilt() const {
225 return is_built;
226}
227
228void Shader::AsyncOpenGLBuilt(OGLProgram new_program) {
229 program->source_program = std::move(new_program);
230 handle = program->source_program.handle;
231 is_built = true;
232}
233
234void Shader::AsyncGLASMBuilt(OGLAssemblyProgram new_program) {
235 program->assembly_program = std::move(new_program);
236 handle = program->assembly_program.handle;
237 is_built = true;
238}
239
240std::unique_ptr<Shader> Shader::CreateStageFromMemory(
241 const ShaderParameters& params, Maxwell::ShaderProgram program_type, ProgramCode code,
242 ProgramCode code_b, VideoCommon::Shader::AsyncShaders& async_shaders, VAddr cpu_addr) {
243 const auto shader_type = GetShaderType(program_type);
244
245 auto& gpu = params.gpu;
246 gpu.ShaderNotify().MarkSharderBuilding();
247
248 auto registry = std::make_shared<Registry>(shader_type, gpu.Maxwell3D());
249 if (!async_shaders.IsShaderAsync(gpu) || !params.device.UseAsynchronousShaders()) {
250 const ShaderIR ir(code, STAGE_MAIN_OFFSET, COMPILER_SETTINGS, *registry);
251 // TODO(Rodrigo): Handle VertexA shaders
252 // std::optional<ShaderIR> ir_b;
253 // if (!code_b.empty()) {
254 // ir_b.emplace(code_b, STAGE_MAIN_OFFSET);
255 // }
256 auto program =
257 BuildShader(params.device, shader_type, params.unique_identifier, ir, *registry);
258 ShaderDiskCacheEntry entry;
259 entry.type = shader_type;
260 entry.code = std::move(code);
261 entry.code_b = std::move(code_b);
262 entry.unique_identifier = params.unique_identifier;
263 entry.bound_buffer = registry->GetBoundBuffer();
264 entry.graphics_info = registry->GetGraphicsInfo();
265 entry.keys = registry->GetKeys();
266 entry.bound_samplers = registry->GetBoundSamplers();
267 entry.bindless_samplers = registry->GetBindlessSamplers();
268 params.disk_cache.SaveEntry(std::move(entry));
269
270 gpu.ShaderNotify().MarkShaderComplete();
271
272 return std::unique_ptr<Shader>(new Shader(std::move(registry),
273 MakeEntries(params.device, ir, shader_type),
274 std::move(program), true));
275 } else {
276 // Required for entries
277 const ShaderIR ir(code, STAGE_MAIN_OFFSET, COMPILER_SETTINGS, *registry);
278 auto entries = MakeEntries(params.device, ir, shader_type);
279
280 async_shaders.QueueOpenGLShader(params.device, shader_type, params.unique_identifier,
281 std::move(code), std::move(code_b), STAGE_MAIN_OFFSET,
282 COMPILER_SETTINGS, *registry, cpu_addr);
283
284 auto program = std::make_shared<ProgramHandle>();
285 return std::unique_ptr<Shader>(
286 new Shader(std::move(registry), std::move(entries), std::move(program), false));
287 }
288}
289
290std::unique_ptr<Shader> Shader::CreateKernelFromMemory(const ShaderParameters& params,
291 ProgramCode code) {
292 auto& gpu = params.gpu;
293 gpu.ShaderNotify().MarkSharderBuilding();
294
295 auto registry = std::make_shared<Registry>(ShaderType::Compute, params.engine);
296 const ShaderIR ir(code, KERNEL_MAIN_OFFSET, COMPILER_SETTINGS, *registry);
297 const u64 uid = params.unique_identifier;
298 auto program = BuildShader(params.device, ShaderType::Compute, uid, ir, *registry);
299
300 ShaderDiskCacheEntry entry;
301 entry.type = ShaderType::Compute;
302 entry.code = std::move(code);
303 entry.unique_identifier = uid;
304 entry.bound_buffer = registry->GetBoundBuffer();
305 entry.compute_info = registry->GetComputeInfo();
306 entry.keys = registry->GetKeys();
307 entry.bound_samplers = registry->GetBoundSamplers();
308 entry.bindless_samplers = registry->GetBindlessSamplers();
309 params.disk_cache.SaveEntry(std::move(entry));
310
311 gpu.ShaderNotify().MarkShaderComplete();
312
313 return std::unique_ptr<Shader>(new Shader(std::move(registry),
314 MakeEntries(params.device, ir, ShaderType::Compute),
315 std::move(program)));
316}
317
318std::unique_ptr<Shader> Shader::CreateFromCache(const ShaderParameters& params,
319 const PrecompiledShader& precompiled_shader) {
320 return std::unique_ptr<Shader>(new Shader(
321 precompiled_shader.registry, precompiled_shader.entries, precompiled_shader.program));
322}
323
324ShaderCacheOpenGL::ShaderCacheOpenGL(RasterizerOpenGL& rasterizer_, 36ShaderCacheOpenGL::ShaderCacheOpenGL(RasterizerOpenGL& rasterizer_,
325 Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_, 37 Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_,
326 Tegra::Engines::Maxwell3D& maxwell3d_, 38 Tegra::Engines::Maxwell3D& maxwell3d_,
@@ -331,278 +43,4 @@ ShaderCacheOpenGL::ShaderCacheOpenGL(RasterizerOpenGL& rasterizer_,
331 43
332ShaderCacheOpenGL::~ShaderCacheOpenGL() = default; 44ShaderCacheOpenGL::~ShaderCacheOpenGL() = default;
333 45
334void ShaderCacheOpenGL::LoadDiskCache(u64 title_id, std::stop_token stop_loading,
335 const VideoCore::DiskResourceLoadCallback& callback) {
336 disk_cache.BindTitleID(title_id);
337 const std::optional transferable = disk_cache.LoadTransferable();
338
339 LOG_INFO(Render_OpenGL, "Total Shader Count: {}",
340 transferable.has_value() ? transferable->size() : 0);
341
342 if (!transferable) {
343 return;
344 }
345
346 std::vector<ShaderDiskCachePrecompiled> gl_cache;
347 if (!device.UseAssemblyShaders() && !device.UseDriverCache()) {
348 // Only load precompiled cache when we are not using assembly shaders
349 gl_cache = disk_cache.LoadPrecompiled();
350 }
351 const auto supported_formats = GetSupportedFormats();
352
353 // Track if precompiled cache was altered during loading to know if we have to
354 // serialize the virtual precompiled cache file back to the hard drive
355 bool precompiled_cache_altered = false;
356
357 // Inform the frontend about shader build initialization
358 if (callback) {
359 callback(VideoCore::LoadCallbackStage::Build, 0, transferable->size());
360 }
361
362 std::mutex mutex;
363 std::size_t built_shaders = 0; // It doesn't have be atomic since it's used behind a mutex
364 std::atomic_bool gl_cache_failed = false;
365
366 const auto find_precompiled = [&gl_cache](u64 id) {
367 return std::ranges::find(gl_cache, id, &ShaderDiskCachePrecompiled::unique_identifier);
368 };
369
370 const auto worker = [&](Core::Frontend::GraphicsContext* context, std::size_t begin,
371 std::size_t end) {
372 const auto scope = context->Acquire();
373
374 for (std::size_t i = begin; i < end; ++i) {
375 if (stop_loading.stop_requested()) {
376 return;
377 }
378 const auto& entry = (*transferable)[i];
379 const u64 uid = entry.unique_identifier;
380 const auto it = find_precompiled(uid);
381 const auto precompiled_entry = it != gl_cache.end() ? &*it : nullptr;
382
383 const bool is_compute = entry.type == ShaderType::Compute;
384 const u32 main_offset = is_compute ? KERNEL_MAIN_OFFSET : STAGE_MAIN_OFFSET;
385 auto registry = MakeRegistry(entry);
386 const ShaderIR ir(entry.code, main_offset, COMPILER_SETTINGS, *registry);
387
388 ProgramSharedPtr program;
389 if (precompiled_entry) {
390 // If the shader is precompiled, attempt to load it with
391 program = GeneratePrecompiledProgram(entry, *precompiled_entry, supported_formats);
392 if (!program) {
393 gl_cache_failed = true;
394 }
395 }
396 if (!program) {
397 // Otherwise compile it from GLSL
398 program = BuildShader(device, entry.type, uid, ir, *registry, true);
399 }
400
401 PrecompiledShader shader;
402 shader.program = std::move(program);
403 shader.registry = std::move(registry);
404 shader.entries = MakeEntries(device, ir, entry.type);
405
406 std::scoped_lock lock{mutex};
407 if (callback) {
408 callback(VideoCore::LoadCallbackStage::Build, ++built_shaders,
409 transferable->size());
410 }
411 runtime_cache.emplace(entry.unique_identifier, std::move(shader));
412 }
413 };
414
415 const std::size_t num_workers{std::max(1U, std::thread::hardware_concurrency())};
416 const std::size_t bucket_size{transferable->size() / num_workers};
417 std::vector<std::unique_ptr<Core::Frontend::GraphicsContext>> contexts(num_workers);
418 std::vector<std::thread> threads(num_workers);
419 for (std::size_t i = 0; i < num_workers; ++i) {
420 const bool is_last_worker = i + 1 == num_workers;
421 const std::size_t start{bucket_size * i};
422 const std::size_t end{is_last_worker ? transferable->size() : start + bucket_size};
423
424 // On some platforms the shared context has to be created from the GUI thread
425 contexts[i] = emu_window.CreateSharedContext();
426 threads[i] = std::thread(worker, contexts[i].get(), start, end);
427 }
428 for (auto& thread : threads) {
429 thread.join();
430 }
431
432 if (gl_cache_failed) {
433 // Invalidate the precompiled cache if a shader dumped shader was rejected
434 disk_cache.InvalidatePrecompiled();
435 precompiled_cache_altered = true;
436 return;
437 }
438 if (stop_loading.stop_requested()) {
439 return;
440 }
441
442 if (device.UseAssemblyShaders() || device.UseDriverCache()) {
443 // Don't store precompiled binaries for assembly shaders or when using the driver cache
444 return;
445 }
446
447 // TODO(Rodrigo): Do state tracking for transferable shaders and do a dummy draw
448 // before precompiling them
449
450 for (std::size_t i = 0; i < transferable->size(); ++i) {
451 const u64 id = (*transferable)[i].unique_identifier;
452 const auto it = find_precompiled(id);
453 if (it == gl_cache.end()) {
454 const GLuint program = runtime_cache.at(id).program->source_program.handle;
455 disk_cache.SavePrecompiled(id, program);
456 precompiled_cache_altered = true;
457 }
458 }
459
460 if (precompiled_cache_altered) {
461 disk_cache.SaveVirtualPrecompiledFile();
462 }
463}
464
465ProgramSharedPtr ShaderCacheOpenGL::GeneratePrecompiledProgram(
466 const ShaderDiskCacheEntry& entry, const ShaderDiskCachePrecompiled& precompiled_entry,
467 const std::unordered_set<GLenum>& supported_formats) {
468 if (!supported_formats.contains(precompiled_entry.binary_format)) {
469 LOG_INFO(Render_OpenGL, "Precompiled cache entry with unsupported format, removing");
470 return {};
471 }
472
473 auto program = std::make_shared<ProgramHandle>();
474 GLuint& handle = program->source_program.handle;
475 handle = glCreateProgram();
476 glProgramParameteri(handle, GL_PROGRAM_SEPARABLE, GL_TRUE);
477 glProgramBinary(handle, precompiled_entry.binary_format, precompiled_entry.binary.data(),
478 static_cast<GLsizei>(precompiled_entry.binary.size()));
479
480 GLint link_status;
481 glGetProgramiv(handle, GL_LINK_STATUS, &link_status);
482 if (link_status == GL_FALSE) {
483 LOG_INFO(Render_OpenGL, "Precompiled cache rejected by the driver, removing");
484 return {};
485 }
486
487 return program;
488}
489
490Shader* ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program,
491 VideoCommon::Shader::AsyncShaders& async_shaders) {
492 if (!maxwell3d.dirty.flags[Dirty::Shaders]) {
493 auto* last_shader = last_shaders[static_cast<std::size_t>(program)];
494 if (last_shader->IsBuilt()) {
495 return last_shader;
496 }
497 }
498
499 const GPUVAddr address{GetShaderAddress(maxwell3d, program)};
500
501 if (device.UseAsynchronousShaders() && async_shaders.HasCompletedWork()) {
502 auto completed_work = async_shaders.GetCompletedWork();
503 for (auto& work : completed_work) {
504 Shader* shader = TryGet(work.cpu_address);
505 gpu.ShaderNotify().MarkShaderComplete();
506 if (shader == nullptr) {
507 continue;
508 }
509 using namespace VideoCommon::Shader;
510 if (work.backend == AsyncShaders::Backend::OpenGL) {
511 shader->AsyncOpenGLBuilt(std::move(work.program.opengl));
512 } else if (work.backend == AsyncShaders::Backend::GLASM) {
513 shader->AsyncGLASMBuilt(std::move(work.program.glasm));
514 }
515
516 auto& registry = shader->GetRegistry();
517
518 ShaderDiskCacheEntry entry;
519 entry.type = work.shader_type;
520 entry.code = std::move(work.code);
521 entry.code_b = std::move(work.code_b);
522 entry.unique_identifier = work.uid;
523 entry.bound_buffer = registry.GetBoundBuffer();
524 entry.graphics_info = registry.GetGraphicsInfo();
525 entry.keys = registry.GetKeys();
526 entry.bound_samplers = registry.GetBoundSamplers();
527 entry.bindless_samplers = registry.GetBindlessSamplers();
528 disk_cache.SaveEntry(std::move(entry));
529 }
530 }
531
532 // Look up shader in the cache based on address
533 const std::optional<VAddr> cpu_addr{gpu_memory.GpuToCpuAddress(address)};
534 if (Shader* const shader{cpu_addr ? TryGet(*cpu_addr) : null_shader.get()}) {
535 return last_shaders[static_cast<std::size_t>(program)] = shader;
536 }
537
538 const u8* const host_ptr{gpu_memory.GetPointer(address)};
539
540 // No shader found - create a new one
541 ProgramCode code{GetShaderCode(gpu_memory, address, host_ptr, false)};
542 ProgramCode code_b;
543 if (program == Maxwell::ShaderProgram::VertexA) {
544 const GPUVAddr address_b{GetShaderAddress(maxwell3d, Maxwell::ShaderProgram::VertexB)};
545 const u8* host_ptr_b = gpu_memory.GetPointer(address_b);
546 code_b = GetShaderCode(gpu_memory, address_b, host_ptr_b, false);
547 }
548 const std::size_t code_size = code.size() * sizeof(u64);
549
550 const u64 unique_identifier = GetUniqueIdentifier(
551 GetShaderType(program), program == Maxwell::ShaderProgram::VertexA, code, code_b);
552
553 const ShaderParameters params{gpu, maxwell3d, disk_cache, device,
554 *cpu_addr, host_ptr, unique_identifier};
555
556 std::unique_ptr<Shader> shader;
557 const auto found = runtime_cache.find(unique_identifier);
558 if (found == runtime_cache.end()) {
559 shader = Shader::CreateStageFromMemory(params, program, std::move(code), std::move(code_b),
560 async_shaders, cpu_addr.value_or(0));
561 } else {
562 shader = Shader::CreateFromCache(params, found->second);
563 }
564
565 Shader* const result = shader.get();
566 if (cpu_addr) {
567 Register(std::move(shader), *cpu_addr, code_size);
568 } else {
569 null_shader = std::move(shader);
570 }
571
572 return last_shaders[static_cast<std::size_t>(program)] = result;
573}
574
575Shader* ShaderCacheOpenGL::GetComputeKernel(GPUVAddr code_addr) {
576 const std::optional<VAddr> cpu_addr{gpu_memory.GpuToCpuAddress(code_addr)};
577
578 if (Shader* const kernel = cpu_addr ? TryGet(*cpu_addr) : null_kernel.get()) {
579 return kernel;
580 }
581
582 // No kernel found, create a new one
583 const u8* host_ptr{gpu_memory.GetPointer(code_addr)};
584 ProgramCode code{GetShaderCode(gpu_memory, code_addr, host_ptr, true)};
585 const std::size_t code_size{code.size() * sizeof(u64)};
586 const u64 unique_identifier{GetUniqueIdentifier(ShaderType::Compute, false, code)};
587
588 const ShaderParameters params{gpu, kepler_compute, disk_cache, device,
589 *cpu_addr, host_ptr, unique_identifier};
590
591 std::unique_ptr<Shader> kernel;
592 const auto found = runtime_cache.find(unique_identifier);
593 if (found == runtime_cache.end()) {
594 kernel = Shader::CreateKernelFromMemory(params, std::move(code));
595 } else {
596 kernel = Shader::CreateFromCache(params, found->second);
597 }
598
599 Shader* const result = kernel.get();
600 if (cpu_addr) {
601 Register(std::move(kernel), *cpu_addr, code_size);
602 } else {
603 null_kernel = std::move(kernel);
604 }
605 return result;
606}
607
608} // namespace OpenGL 46} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h
index b30308b6f..ad3d15a76 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.h
+++ b/src/video_core/renderer_opengl/gl_shader_cache.h
@@ -19,10 +19,6 @@
19#include "common/common_types.h" 19#include "common/common_types.h"
20#include "video_core/engines/shader_type.h" 20#include "video_core/engines/shader_type.h"
21#include "video_core/renderer_opengl/gl_resource_manager.h" 21#include "video_core/renderer_opengl/gl_resource_manager.h"
22#include "video_core/renderer_opengl/gl_shader_decompiler.h"
23#include "video_core/renderer_opengl/gl_shader_disk_cache.h"
24#include "video_core/shader/registry.h"
25#include "video_core/shader/shader_ir.h"
26#include "video_core/shader_cache.h" 22#include "video_core/shader_cache.h"
27 23
28namespace Tegra { 24namespace Tegra {
@@ -33,10 +29,6 @@ namespace Core::Frontend {
33class EmuWindow; 29class EmuWindow;
34} 30}
35 31
36namespace VideoCommon::Shader {
37class AsyncShaders;
38}
39
40namespace OpenGL { 32namespace OpenGL {
41 33
42class Device; 34class Device;
@@ -44,77 +36,10 @@ class RasterizerOpenGL;
44 36
45using Maxwell = Tegra::Engines::Maxwell3D::Regs; 37using Maxwell = Tegra::Engines::Maxwell3D::Regs;
46 38
47struct ProgramHandle { 39class Shader {
48 OGLProgram source_program;
49 OGLAssemblyProgram assembly_program;
50};
51using ProgramSharedPtr = std::shared_ptr<ProgramHandle>;
52
53struct PrecompiledShader {
54 ProgramSharedPtr program;
55 std::shared_ptr<VideoCommon::Shader::Registry> registry;
56 ShaderEntries entries;
57};
58
59struct ShaderParameters {
60 Tegra::GPU& gpu;
61 Tegra::Engines::ConstBufferEngineInterface& engine;
62 ShaderDiskCacheOpenGL& disk_cache;
63 const Device& device;
64 VAddr cpu_addr;
65 const u8* host_ptr;
66 u64 unique_identifier;
67};
68
69ProgramSharedPtr BuildShader(const Device& device, Tegra::Engines::ShaderType shader_type,
70 u64 unique_identifier, const VideoCommon::Shader::ShaderIR& ir,
71 const VideoCommon::Shader::Registry& registry,
72 bool hint_retrievable = false);
73
74class Shader final {
75public: 40public:
41 explicit Shader();
76 ~Shader(); 42 ~Shader();
77
78 /// Gets the GL program handle for the shader
79 GLuint GetHandle() const;
80
81 bool IsBuilt() const;
82
83 /// Gets the shader entries for the shader
84 const ShaderEntries& GetEntries() const {
85 return entries;
86 }
87
88 const VideoCommon::Shader::Registry& GetRegistry() const {
89 return *registry;
90 }
91
92 /// Mark a OpenGL shader as built
93 void AsyncOpenGLBuilt(OGLProgram new_program);
94
95 /// Mark a GLASM shader as built
96 void AsyncGLASMBuilt(OGLAssemblyProgram new_program);
97
98 static std::unique_ptr<Shader> CreateStageFromMemory(
99 const ShaderParameters& params, Maxwell::ShaderProgram program_type,
100 ProgramCode program_code, ProgramCode program_code_b,
101 VideoCommon::Shader::AsyncShaders& async_shaders, VAddr cpu_addr);
102
103 static std::unique_ptr<Shader> CreateKernelFromMemory(const ShaderParameters& params,
104 ProgramCode code);
105
106 static std::unique_ptr<Shader> CreateFromCache(const ShaderParameters& params,
107 const PrecompiledShader& precompiled_shader);
108
109private:
110 explicit Shader(std::shared_ptr<VideoCommon::Shader::Registry> registry, ShaderEntries entries,
111 ProgramSharedPtr program, bool is_built_ = true);
112
113 std::shared_ptr<VideoCommon::Shader::Registry> registry;
114 ShaderEntries entries;
115 ProgramSharedPtr program;
116 GLuint handle = 0;
117 bool is_built{};
118}; 43};
119 44
120class ShaderCacheOpenGL final : public VideoCommon::ShaderCache<Shader> { 45class ShaderCacheOpenGL final : public VideoCommon::ShaderCache<Shader> {
@@ -126,36 +51,13 @@ public:
126 Tegra::MemoryManager& gpu_memory_, const Device& device_); 51 Tegra::MemoryManager& gpu_memory_, const Device& device_);
127 ~ShaderCacheOpenGL() override; 52 ~ShaderCacheOpenGL() override;
128 53
129 /// Loads disk cache for the current game
130 void LoadDiskCache(u64 title_id, std::stop_token stop_loading,
131 const VideoCore::DiskResourceLoadCallback& callback);
132
133 /// Gets the current specified shader stage program
134 Shader* GetStageProgram(Maxwell::ShaderProgram program,
135 VideoCommon::Shader::AsyncShaders& async_shaders);
136
137 /// Gets a compute kernel in the passed address
138 Shader* GetComputeKernel(GPUVAddr code_addr);
139
140private: 54private:
141 ProgramSharedPtr GeneratePrecompiledProgram(
142 const ShaderDiskCacheEntry& entry, const ShaderDiskCachePrecompiled& precompiled_entry,
143 const std::unordered_set<GLenum>& supported_formats);
144
145 Core::Frontend::EmuWindow& emu_window; 55 Core::Frontend::EmuWindow& emu_window;
146 Tegra::GPU& gpu; 56 Tegra::GPU& gpu;
147 Tegra::MemoryManager& gpu_memory; 57 Tegra::MemoryManager& gpu_memory;
148 Tegra::Engines::Maxwell3D& maxwell3d; 58 Tegra::Engines::Maxwell3D& maxwell3d;
149 Tegra::Engines::KeplerCompute& kepler_compute; 59 Tegra::Engines::KeplerCompute& kepler_compute;
150 const Device& device; 60 const Device& device;
151
152 ShaderDiskCacheOpenGL disk_cache;
153 std::unordered_map<u64, PrecompiledShader> runtime_cache;
154
155 std::unique_ptr<Shader> null_shader;
156 std::unique_ptr<Shader> null_kernel;
157
158 std::array<Shader*, Maxwell::MaxShaderProgram> last_shaders{};
159}; 61};
160 62
161} // namespace OpenGL 63} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
deleted file mode 100644
index 9c28498e8..000000000
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ /dev/null
@@ -1,2986 +0,0 @@
1// Copyright 2018 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <array>
6#include <string>
7#include <string_view>
8#include <utility>
9#include <variant>
10#include <vector>
11
12#include <fmt/format.h>
13
14#include "common/alignment.h"
15#include "common/assert.h"
16#include "common/common_types.h"
17#include "common/div_ceil.h"
18#include "common/logging/log.h"
19#include "video_core/engines/maxwell_3d.h"
20#include "video_core/engines/shader_type.h"
21#include "video_core/renderer_opengl/gl_device.h"
22#include "video_core/renderer_opengl/gl_rasterizer.h"
23#include "video_core/renderer_opengl/gl_shader_decompiler.h"
24#include "video_core/shader/ast.h"
25#include "video_core/shader/node.h"
26#include "video_core/shader/shader_ir.h"
27#include "video_core/shader/transform_feedback.h"
28
29namespace OpenGL {
30
31namespace {
32
33using Tegra::Engines::ShaderType;
34using Tegra::Shader::Attribute;
35using Tegra::Shader::Header;
36using Tegra::Shader::IpaInterpMode;
37using Tegra::Shader::IpaMode;
38using Tegra::Shader::IpaSampleMode;
39using Tegra::Shader::PixelImap;
40using Tegra::Shader::Register;
41using Tegra::Shader::TextureType;
42
43using namespace VideoCommon::Shader;
44using namespace std::string_literals;
45
46using Maxwell = Tegra::Engines::Maxwell3D::Regs;
47using Operation = const OperationNode&;
48
49class ASTDecompiler;
50class ExprDecompiler;
51
52enum class Type { Void, Bool, Bool2, Float, Int, Uint, HalfFloat };
53
54constexpr std::array FLOAT_TYPES{"float", "vec2", "vec3", "vec4"};
55
56constexpr std::string_view INPUT_ATTRIBUTE_NAME = "in_attr";
57constexpr std::string_view OUTPUT_ATTRIBUTE_NAME = "out_attr";
58
59struct TextureOffset {};
60struct TextureDerivates {};
61using TextureArgument = std::pair<Type, Node>;
62using TextureIR = std::variant<TextureOffset, TextureDerivates, TextureArgument>;
63
64constexpr u32 MAX_CONSTBUFFER_SCALARS = static_cast<u32>(Maxwell::MaxConstBufferSize) / sizeof(u32);
65constexpr u32 MAX_CONSTBUFFER_ELEMENTS = MAX_CONSTBUFFER_SCALARS / sizeof(u32);
66
67constexpr std::string_view COMMON_DECLARATIONS = R"(#define ftoi floatBitsToInt
68#define ftou floatBitsToUint
69#define itof intBitsToFloat
70#define utof uintBitsToFloat
71
72bvec2 HalfFloatNanComparison(bvec2 comparison, vec2 pair1, vec2 pair2) {{
73 bvec2 is_nan1 = isnan(pair1);
74 bvec2 is_nan2 = isnan(pair2);
75 return bvec2(comparison.x || is_nan1.x || is_nan2.x, comparison.y || is_nan1.y || is_nan2.y);
76}}
77
78const float fswzadd_modifiers_a[] = float[4](-1.0f, 1.0f, -1.0f, 0.0f );
79const float fswzadd_modifiers_b[] = float[4](-1.0f, -1.0f, 1.0f, -1.0f );
80)";
81
82class ShaderWriter final {
83public:
84 void AddExpression(std::string_view text) {
85 DEBUG_ASSERT(scope >= 0);
86 if (!text.empty()) {
87 AppendIndentation();
88 }
89 shader_source += text;
90 }
91
92 // Forwards all arguments directly to libfmt.
93 // Note that all formatting requirements for fmt must be
94 // obeyed when using this function. (e.g. {{ must be used
95 // printing the character '{' is desirable. Ditto for }} and '}',
96 // etc).
97 template <typename... Args>
98 void AddLine(std::string_view text, Args&&... args) {
99 AddExpression(fmt::format(fmt::runtime(text), std::forward<Args>(args)...));
100 AddNewLine();
101 }
102
103 void AddNewLine() {
104 DEBUG_ASSERT(scope >= 0);
105 shader_source += '\n';
106 }
107
108 std::string GenerateTemporary() {
109 return fmt::format("tmp{}", temporary_index++);
110 }
111
112 std::string GetResult() {
113 return std::move(shader_source);
114 }
115
116 s32 scope = 0;
117
118private:
119 void AppendIndentation() {
120 shader_source.append(static_cast<std::size_t>(scope) * 4, ' ');
121 }
122
123 std::string shader_source;
124 u32 temporary_index = 1;
125};
126
127class Expression final {
128public:
129 Expression(std::string code_, Type type_) : code{std::move(code_)}, type{type_} {
130 ASSERT(type != Type::Void);
131 }
132 Expression() : type{Type::Void} {}
133
134 Type GetType() const {
135 return type;
136 }
137
138 std::string GetCode() const {
139 return code;
140 }
141
142 void CheckVoid() const {
143 ASSERT(type == Type::Void);
144 }
145
146 std::string As(Type type_) const {
147 switch (type_) {
148 case Type::Bool:
149 return AsBool();
150 case Type::Bool2:
151 return AsBool2();
152 case Type::Float:
153 return AsFloat();
154 case Type::Int:
155 return AsInt();
156 case Type::Uint:
157 return AsUint();
158 case Type::HalfFloat:
159 return AsHalfFloat();
160 default:
161 UNREACHABLE_MSG("Invalid type");
162 return code;
163 }
164 }
165
166 std::string AsBool() const {
167 switch (type) {
168 case Type::Bool:
169 return code;
170 default:
171 UNREACHABLE_MSG("Incompatible types");
172 return code;
173 }
174 }
175
176 std::string AsBool2() const {
177 switch (type) {
178 case Type::Bool2:
179 return code;
180 default:
181 UNREACHABLE_MSG("Incompatible types");
182 return code;
183 }
184 }
185
186 std::string AsFloat() const {
187 switch (type) {
188 case Type::Float:
189 return code;
190 case Type::Uint:
191 return fmt::format("utof({})", code);
192 case Type::Int:
193 return fmt::format("itof({})", code);
194 case Type::HalfFloat:
195 return fmt::format("utof(packHalf2x16({}))", code);
196 default:
197 UNREACHABLE_MSG("Incompatible types");
198 return code;
199 }
200 }
201
202 std::string AsInt() const {
203 switch (type) {
204 case Type::Float:
205 return fmt::format("ftoi({})", code);
206 case Type::Uint:
207 return fmt::format("int({})", code);
208 case Type::Int:
209 return code;
210 case Type::HalfFloat:
211 return fmt::format("int(packHalf2x16({}))", code);
212 default:
213 UNREACHABLE_MSG("Incompatible types");
214 return code;
215 }
216 }
217
218 std::string AsUint() const {
219 switch (type) {
220 case Type::Float:
221 return fmt::format("ftou({})", code);
222 case Type::Uint:
223 return code;
224 case Type::Int:
225 return fmt::format("uint({})", code);
226 case Type::HalfFloat:
227 return fmt::format("packHalf2x16({})", code);
228 default:
229 UNREACHABLE_MSG("Incompatible types");
230 return code;
231 }
232 }
233
234 std::string AsHalfFloat() const {
235 switch (type) {
236 case Type::Float:
237 return fmt::format("unpackHalf2x16(ftou({}))", code);
238 case Type::Uint:
239 return fmt::format("unpackHalf2x16({})", code);
240 case Type::Int:
241 return fmt::format("unpackHalf2x16(int({}))", code);
242 case Type::HalfFloat:
243 return code;
244 default:
245 UNREACHABLE_MSG("Incompatible types");
246 return code;
247 }
248 }
249
250private:
251 std::string code;
252 Type type{};
253};
254
255const char* GetTypeString(Type type) {
256 switch (type) {
257 case Type::Bool:
258 return "bool";
259 case Type::Bool2:
260 return "bvec2";
261 case Type::Float:
262 return "float";
263 case Type::Int:
264 return "int";
265 case Type::Uint:
266 return "uint";
267 case Type::HalfFloat:
268 return "vec2";
269 default:
270 UNREACHABLE_MSG("Invalid type");
271 return "<invalid type>";
272 }
273}
274
275const char* GetImageTypeDeclaration(Tegra::Shader::ImageType image_type) {
276 switch (image_type) {
277 case Tegra::Shader::ImageType::Texture1D:
278 return "1D";
279 case Tegra::Shader::ImageType::TextureBuffer:
280 return "Buffer";
281 case Tegra::Shader::ImageType::Texture1DArray:
282 return "1DArray";
283 case Tegra::Shader::ImageType::Texture2D:
284 return "2D";
285 case Tegra::Shader::ImageType::Texture2DArray:
286 return "2DArray";
287 case Tegra::Shader::ImageType::Texture3D:
288 return "3D";
289 default:
290 UNREACHABLE();
291 return "1D";
292 }
293}
294
295/// Describes primitive behavior on geometry shaders
296std::pair<const char*, u32> GetPrimitiveDescription(Maxwell::PrimitiveTopology topology) {
297 switch (topology) {
298 case Maxwell::PrimitiveTopology::Points:
299 return {"points", 1};
300 case Maxwell::PrimitiveTopology::Lines:
301 case Maxwell::PrimitiveTopology::LineStrip:
302 return {"lines", 2};
303 case Maxwell::PrimitiveTopology::LinesAdjacency:
304 case Maxwell::PrimitiveTopology::LineStripAdjacency:
305 return {"lines_adjacency", 4};
306 case Maxwell::PrimitiveTopology::Triangles:
307 case Maxwell::PrimitiveTopology::TriangleStrip:
308 case Maxwell::PrimitiveTopology::TriangleFan:
309 return {"triangles", 3};
310 case Maxwell::PrimitiveTopology::TrianglesAdjacency:
311 case Maxwell::PrimitiveTopology::TriangleStripAdjacency:
312 return {"triangles_adjacency", 6};
313 default:
314 UNIMPLEMENTED_MSG("topology={}", topology);
315 return {"points", 1};
316 }
317}
318
319/// Generates code to use for a swizzle operation.
320constexpr const char* GetSwizzle(std::size_t element) {
321 constexpr std::array swizzle = {".x", ".y", ".z", ".w"};
322 return swizzle.at(element);
323}
324
325constexpr const char* GetColorSwizzle(std::size_t element) {
326 constexpr std::array swizzle = {".r", ".g", ".b", ".a"};
327 return swizzle.at(element);
328}
329
330/// Translate topology
331std::string GetTopologyName(Tegra::Shader::OutputTopology topology) {
332 switch (topology) {
333 case Tegra::Shader::OutputTopology::PointList:
334 return "points";
335 case Tegra::Shader::OutputTopology::LineStrip:
336 return "line_strip";
337 case Tegra::Shader::OutputTopology::TriangleStrip:
338 return "triangle_strip";
339 default:
340 UNIMPLEMENTED_MSG("Unknown output topology: {}", topology);
341 return "points";
342 }
343}
344
345/// Returns true if an object has to be treated as precise
346bool IsPrecise(Operation operand) {
347 const auto& meta{operand.GetMeta()};
348 if (const auto arithmetic = std::get_if<MetaArithmetic>(&meta)) {
349 return arithmetic->precise;
350 }
351 return false;
352}
353
354bool IsPrecise(const Node& node) {
355 if (const auto operation = std::get_if<OperationNode>(&*node)) {
356 return IsPrecise(*operation);
357 }
358 return false;
359}
360
361constexpr bool IsGenericAttribute(Attribute::Index index) {
362 return index >= Attribute::Index::Attribute_0 && index <= Attribute::Index::Attribute_31;
363}
364
365constexpr bool IsLegacyTexCoord(Attribute::Index index) {
366 return static_cast<int>(index) >= static_cast<int>(Attribute::Index::TexCoord_0) &&
367 static_cast<int>(index) <= static_cast<int>(Attribute::Index::TexCoord_7);
368}
369
370constexpr Attribute::Index ToGenericAttribute(u64 value) {
371 return static_cast<Attribute::Index>(value + static_cast<u64>(Attribute::Index::Attribute_0));
372}
373
374constexpr int GetLegacyTexCoordIndex(Attribute::Index index) {
375 return static_cast<int>(index) - static_cast<int>(Attribute::Index::TexCoord_0);
376}
377
378u32 GetGenericAttributeIndex(Attribute::Index index) {
379 ASSERT(IsGenericAttribute(index));
380 return static_cast<u32>(index) - static_cast<u32>(Attribute::Index::Attribute_0);
381}
382
383constexpr const char* GetFlowStackPrefix(MetaStackClass stack) {
384 switch (stack) {
385 case MetaStackClass::Ssy:
386 return "ssy";
387 case MetaStackClass::Pbk:
388 return "pbk";
389 }
390 return {};
391}
392
393std::string FlowStackName(MetaStackClass stack) {
394 return fmt::format("{}_flow_stack", GetFlowStackPrefix(stack));
395}
396
397std::string FlowStackTopName(MetaStackClass stack) {
398 return fmt::format("{}_flow_stack_top", GetFlowStackPrefix(stack));
399}
400
401struct GenericVaryingDescription {
402 std::string name;
403 u8 first_element = 0;
404 bool is_scalar = false;
405};
406
407class GLSLDecompiler final {
408public:
409 explicit GLSLDecompiler(const Device& device_, const ShaderIR& ir_, const Registry& registry_,
410 ShaderType stage_, std::string_view identifier_,
411 std::string_view suffix_)
412 : device{device_}, ir{ir_}, registry{registry_}, stage{stage_},
413 identifier{identifier_}, suffix{suffix_}, header{ir.GetHeader()} {
414 if (stage != ShaderType::Compute) {
415 transform_feedback = BuildTransformFeedback(registry.GetGraphicsInfo());
416 }
417 }
418
419 void Decompile() {
420 DeclareHeader();
421 DeclareVertex();
422 DeclareGeometry();
423 DeclareFragment();
424 DeclareCompute();
425 DeclareInputAttributes();
426 DeclareOutputAttributes();
427 DeclareImages();
428 DeclareSamplers();
429 DeclareGlobalMemory();
430 DeclareConstantBuffers();
431 DeclareLocalMemory();
432 DeclareRegisters();
433 DeclarePredicates();
434 DeclareInternalFlags();
435 DeclareCustomVariables();
436 DeclarePhysicalAttributeReader();
437
438 code.AddLine("void main() {{");
439 ++code.scope;
440
441 if (stage == ShaderType::Vertex) {
442 code.AddLine("gl_Position = vec4(0.0f, 0.0f, 0.0f, 1.0f);");
443 }
444
445 if (ir.IsDecompiled()) {
446 DecompileAST();
447 } else {
448 DecompileBranchMode();
449 }
450
451 --code.scope;
452 code.AddLine("}}");
453 }
454
455 std::string GetResult() {
456 return code.GetResult();
457 }
458
459private:
460 friend class ASTDecompiler;
461 friend class ExprDecompiler;
462
463 void DecompileBranchMode() {
464 // VM's program counter
465 const auto first_address = ir.GetBasicBlocks().begin()->first;
466 code.AddLine("uint jmp_to = {}U;", first_address);
467
468 // TODO(Subv): Figure out the actual depth of the flow stack, for now it seems
469 // unlikely that shaders will use 20 nested SSYs and PBKs.
470 constexpr u32 FLOW_STACK_SIZE = 20;
471 if (!ir.IsFlowStackDisabled()) {
472 for (const auto stack : std::array{MetaStackClass::Ssy, MetaStackClass::Pbk}) {
473 code.AddLine("uint {}[{}];", FlowStackName(stack), FLOW_STACK_SIZE);
474 code.AddLine("uint {} = 0U;", FlowStackTopName(stack));
475 }
476 }
477
478 code.AddLine("while (true) {{");
479 ++code.scope;
480
481 code.AddLine("switch (jmp_to) {{");
482
483 for (const auto& pair : ir.GetBasicBlocks()) {
484 const auto& [address, bb] = pair;
485 code.AddLine("case 0x{:X}U: {{", address);
486 ++code.scope;
487
488 VisitBlock(bb);
489
490 --code.scope;
491 code.AddLine("}}");
492 }
493
494 code.AddLine("default: return;");
495 code.AddLine("}}");
496
497 --code.scope;
498 code.AddLine("}}");
499 }
500
501 void DecompileAST();
502
503 void DeclareHeader() {
504 if (!identifier.empty()) {
505 code.AddLine("// {}", identifier);
506 }
507 const bool use_compatibility = ir.UsesLegacyVaryings() || ir.UsesYNegate();
508 code.AddLine("#version 440 {}", use_compatibility ? "compatibility" : "core");
509 code.AddLine("#extension GL_ARB_separate_shader_objects : enable");
510 if (device.HasShaderBallot()) {
511 code.AddLine("#extension GL_ARB_shader_ballot : require");
512 }
513 if (device.HasVertexViewportLayer()) {
514 code.AddLine("#extension GL_ARB_shader_viewport_layer_array : require");
515 }
516 if (device.HasImageLoadFormatted()) {
517 code.AddLine("#extension GL_EXT_shader_image_load_formatted : require");
518 }
519 if (device.HasTextureShadowLod()) {
520 code.AddLine("#extension GL_EXT_texture_shadow_lod : require");
521 }
522 if (device.HasWarpIntrinsics()) {
523 code.AddLine("#extension GL_NV_gpu_shader5 : require");
524 code.AddLine("#extension GL_NV_shader_thread_group : require");
525 code.AddLine("#extension GL_NV_shader_thread_shuffle : require");
526 }
527 // This pragma stops Nvidia's driver from over optimizing math (probably using fp16
528 // operations) on places where we don't want to.
529 // Thanks to Ryujinx for finding this workaround.
530 code.AddLine("#pragma optionNV(fastmath off)");
531
532 code.AddNewLine();
533
534 code.AddLine(COMMON_DECLARATIONS);
535 }
536
537 void DeclareVertex() {
538 if (stage != ShaderType::Vertex) {
539 return;
540 }
541
542 DeclareVertexRedeclarations();
543 }
544
545 void DeclareGeometry() {
546 if (stage != ShaderType::Geometry) {
547 return;
548 }
549
550 const auto& info = registry.GetGraphicsInfo();
551 const auto input_topology = info.primitive_topology;
552 const auto [glsl_topology, max_vertices] = GetPrimitiveDescription(input_topology);
553 max_input_vertices = max_vertices;
554 code.AddLine("layout ({}) in;", glsl_topology);
555
556 const auto topology = GetTopologyName(header.common3.output_topology);
557 const auto max_output_vertices = header.common4.max_output_vertices.Value();
558 code.AddLine("layout ({}, max_vertices = {}) out;", topology, max_output_vertices);
559 code.AddNewLine();
560
561 code.AddLine("in gl_PerVertex {{");
562 ++code.scope;
563 code.AddLine("vec4 gl_Position;");
564 --code.scope;
565 code.AddLine("}} gl_in[];");
566
567 DeclareVertexRedeclarations();
568 }
569
570 void DeclareFragment() {
571 if (stage != ShaderType::Fragment) {
572 return;
573 }
574 if (ir.UsesLegacyVaryings()) {
575 code.AddLine("in gl_PerFragment {{");
576 ++code.scope;
577 code.AddLine("vec4 gl_TexCoord[8];");
578 code.AddLine("vec4 gl_Color;");
579 code.AddLine("vec4 gl_SecondaryColor;");
580 --code.scope;
581 code.AddLine("}};");
582 }
583
584 for (u32 rt = 0; rt < Maxwell::NumRenderTargets; ++rt) {
585 code.AddLine("layout (location = {}) out vec4 frag_color{};", rt, rt);
586 }
587 }
588
589 void DeclareCompute() {
590 if (stage != ShaderType::Compute) {
591 return;
592 }
593 const auto& info = registry.GetComputeInfo();
594 if (u32 size = info.shared_memory_size_in_words * 4; size > 0) {
595 const u32 limit = device.GetMaxComputeSharedMemorySize();
596 if (size > limit) {
597 LOG_ERROR(Render_OpenGL, "Shared memory size {} is clamped to host's limit {}",
598 size, limit);
599 size = limit;
600 }
601
602 code.AddLine("shared uint smem[{}];", size / 4);
603 code.AddNewLine();
604 }
605 code.AddLine("layout (local_size_x = {}, local_size_y = {}, local_size_z = {}) in;",
606 info.workgroup_size[0], info.workgroup_size[1], info.workgroup_size[2]);
607 code.AddNewLine();
608 }
609
610 void DeclareVertexRedeclarations() {
611 code.AddLine("out gl_PerVertex {{");
612 ++code.scope;
613
614 auto pos_xfb = GetTransformFeedbackDecoration(Attribute::Index::Position);
615 if (!pos_xfb.empty()) {
616 pos_xfb = fmt::format("layout ({}) ", pos_xfb);
617 }
618 const char* pos_type =
619 FLOAT_TYPES.at(GetNumComponents(Attribute::Index::Position).value_or(4) - 1);
620 code.AddLine("{}{} gl_Position;", pos_xfb, pos_type);
621
622 for (const auto attribute : ir.GetOutputAttributes()) {
623 if (attribute == Attribute::Index::ClipDistances0123 ||
624 attribute == Attribute::Index::ClipDistances4567) {
625 code.AddLine("float gl_ClipDistance[];");
626 break;
627 }
628 }
629
630 if (stage != ShaderType::Geometry &&
631 (stage != ShaderType::Vertex || device.HasVertexViewportLayer())) {
632 if (ir.UsesLayer()) {
633 code.AddLine("int gl_Layer;");
634 }
635 if (ir.UsesViewportIndex()) {
636 code.AddLine("int gl_ViewportIndex;");
637 }
638 } else if ((ir.UsesLayer() || ir.UsesViewportIndex()) && stage == ShaderType::Vertex &&
639 !device.HasVertexViewportLayer()) {
640 LOG_ERROR(
641 Render_OpenGL,
642 "GL_ARB_shader_viewport_layer_array is not available and its required by a shader");
643 }
644
645 if (ir.UsesPointSize()) {
646 code.AddLine("float gl_PointSize;");
647 }
648
649 if (ir.UsesLegacyVaryings()) {
650 code.AddLine("vec4 gl_TexCoord[8];");
651 code.AddLine("vec4 gl_FrontColor;");
652 code.AddLine("vec4 gl_FrontSecondaryColor;");
653 code.AddLine("vec4 gl_BackColor;");
654 code.AddLine("vec4 gl_BackSecondaryColor;");
655 }
656
657 --code.scope;
658 code.AddLine("}};");
659 code.AddNewLine();
660
661 if (stage == ShaderType::Geometry) {
662 if (ir.UsesLayer()) {
663 code.AddLine("out int gl_Layer;");
664 }
665 if (ir.UsesViewportIndex()) {
666 code.AddLine("out int gl_ViewportIndex;");
667 }
668 }
669 code.AddNewLine();
670 }
671
672 void DeclareRegisters() {
673 const auto& registers = ir.GetRegisters();
674 for (const u32 gpr : registers) {
675 code.AddLine("float {} = 0.0f;", GetRegister(gpr));
676 }
677 if (!registers.empty()) {
678 code.AddNewLine();
679 }
680 }
681
682 void DeclareCustomVariables() {
683 const u32 num_custom_variables = ir.GetNumCustomVariables();
684 for (u32 i = 0; i < num_custom_variables; ++i) {
685 code.AddLine("float {} = 0.0f;", GetCustomVariable(i));
686 }
687 if (num_custom_variables > 0) {
688 code.AddNewLine();
689 }
690 }
691
692 void DeclarePredicates() {
693 const auto& predicates = ir.GetPredicates();
694 for (const auto pred : predicates) {
695 code.AddLine("bool {} = false;", GetPredicate(pred));
696 }
697 if (!predicates.empty()) {
698 code.AddNewLine();
699 }
700 }
701
702 void DeclareLocalMemory() {
703 u64 local_memory_size = 0;
704 if (stage == ShaderType::Compute) {
705 local_memory_size = registry.GetComputeInfo().local_memory_size_in_words * 4ULL;
706 } else {
707 local_memory_size = header.GetLocalMemorySize();
708 }
709 if (local_memory_size == 0) {
710 return;
711 }
712 const u64 element_count = Common::AlignUp(local_memory_size, 4) / 4;
713 code.AddLine("uint {}[{}];", GetLocalMemory(), element_count);
714 code.AddNewLine();
715 }
716
717 void DeclareInternalFlags() {
718 for (u32 flag = 0; flag < static_cast<u32>(InternalFlag::Amount); flag++) {
719 const auto flag_code = static_cast<InternalFlag>(flag);
720 code.AddLine("bool {} = false;", GetInternalFlag(flag_code));
721 }
722 code.AddNewLine();
723 }
724
725 const char* GetInputFlags(PixelImap attribute) {
726 switch (attribute) {
727 case PixelImap::Perspective:
728 return "smooth";
729 case PixelImap::Constant:
730 return "flat";
731 case PixelImap::ScreenLinear:
732 return "noperspective";
733 case PixelImap::Unused:
734 break;
735 }
736 UNIMPLEMENTED_MSG("Unknown attribute usage index={}", attribute);
737 return {};
738 }
739
740 void DeclareInputAttributes() {
741 if (ir.HasPhysicalAttributes()) {
742 const u32 num_inputs{GetNumPhysicalInputAttributes()};
743 for (u32 i = 0; i < num_inputs; ++i) {
744 DeclareInputAttribute(ToGenericAttribute(i), true);
745 }
746 code.AddNewLine();
747 return;
748 }
749
750 const auto& attributes = ir.GetInputAttributes();
751 for (const auto index : attributes) {
752 if (IsGenericAttribute(index)) {
753 DeclareInputAttribute(index, false);
754 }
755 }
756 if (!attributes.empty()) {
757 code.AddNewLine();
758 }
759 }
760
761 void DeclareInputAttribute(Attribute::Index index, bool skip_unused) {
762 const u32 location{GetGenericAttributeIndex(index)};
763
764 std::string name{GetGenericInputAttribute(index)};
765 if (stage == ShaderType::Geometry) {
766 name = "gs_" + name + "[]";
767 }
768
769 std::string suffix_;
770 if (stage == ShaderType::Fragment) {
771 const auto input_mode{header.ps.GetPixelImap(location)};
772 if (input_mode == PixelImap::Unused) {
773 return;
774 }
775 suffix_ = GetInputFlags(input_mode);
776 }
777
778 code.AddLine("layout (location = {}) {} in vec4 {};", location, suffix_, name);
779 }
780
781 void DeclareOutputAttributes() {
782 if (ir.HasPhysicalAttributes() && stage != ShaderType::Fragment) {
783 for (u32 i = 0; i < GetNumPhysicalVaryings(); ++i) {
784 DeclareOutputAttribute(ToGenericAttribute(i));
785 }
786 code.AddNewLine();
787 return;
788 }
789
790 const auto& attributes = ir.GetOutputAttributes();
791 for (const auto index : attributes) {
792 if (IsGenericAttribute(index)) {
793 DeclareOutputAttribute(index);
794 }
795 }
796 if (!attributes.empty()) {
797 code.AddNewLine();
798 }
799 }
800
801 std::optional<std::size_t> GetNumComponents(Attribute::Index index, u8 element = 0) const {
802 const u8 location = static_cast<u8>(static_cast<u32>(index) * 4 + element);
803 const auto it = transform_feedback.find(location);
804 if (it == transform_feedback.end()) {
805 return std::nullopt;
806 }
807 return it->second.components;
808 }
809
810 std::string GetTransformFeedbackDecoration(Attribute::Index index, u8 element = 0) const {
811 const u8 location = static_cast<u8>(static_cast<u32>(index) * 4 + element);
812 const auto it = transform_feedback.find(location);
813 if (it == transform_feedback.end()) {
814 return {};
815 }
816
817 const VaryingTFB& tfb = it->second;
818 return fmt::format("xfb_buffer = {}, xfb_offset = {}, xfb_stride = {}", tfb.buffer,
819 tfb.offset, tfb.stride);
820 }
821
822 void DeclareOutputAttribute(Attribute::Index index) {
823 static constexpr std::string_view swizzle = "xyzw";
824 u8 element = 0;
825 while (element < 4) {
826 auto xfb = GetTransformFeedbackDecoration(index, element);
827 if (!xfb.empty()) {
828 xfb = fmt::format(", {}", xfb);
829 }
830 const std::size_t remainder = 4 - element;
831 const std::size_t num_components = GetNumComponents(index, element).value_or(remainder);
832 const char* const type = FLOAT_TYPES.at(num_components - 1);
833
834 const u32 location = GetGenericAttributeIndex(index);
835
836 GenericVaryingDescription description;
837 description.first_element = static_cast<u8>(element);
838 description.is_scalar = num_components == 1;
839 description.name = AppendSuffix(location, OUTPUT_ATTRIBUTE_NAME);
840 if (element != 0 || num_components != 4) {
841 const std::string_view name_swizzle = swizzle.substr(element, num_components);
842 description.name = fmt::format("{}_{}", description.name, name_swizzle);
843 }
844 for (std::size_t i = 0; i < num_components; ++i) {
845 const u8 offset = static_cast<u8>(location * 4 + element + i);
846 varying_description.insert({offset, description});
847 }
848
849 code.AddLine("layout (location = {}, component = {}{}) out {} {};", location, element,
850 xfb, type, description.name);
851
852 element = static_cast<u8>(static_cast<std::size_t>(element) + num_components);
853 }
854 }
855
856 void DeclareConstantBuffers() {
857 u32 binding = device.GetBaseBindings(stage).uniform_buffer;
858 for (const auto& [index, info] : ir.GetConstantBuffers()) {
859 const u32 num_elements = Common::DivCeil(info.GetSize(), 4 * sizeof(u32));
860 const u32 size = info.IsIndirect() ? MAX_CONSTBUFFER_ELEMENTS : num_elements;
861 code.AddLine("layout (std140, binding = {}) uniform {} {{", binding++,
862 GetConstBufferBlock(index));
863 code.AddLine(" uvec4 {}[{}];", GetConstBuffer(index), size);
864 code.AddLine("}};");
865 code.AddNewLine();
866 }
867 }
868
869 void DeclareGlobalMemory() {
870 u32 binding = device.GetBaseBindings(stage).shader_storage_buffer;
871 for (const auto& [base, usage] : ir.GetGlobalMemory()) {
872 // Since we don't know how the shader will use the shader, hint the driver to disable as
873 // much optimizations as possible
874 std::string qualifier = "coherent volatile";
875 if (usage.is_read && !usage.is_written) {
876 qualifier += " readonly";
877 } else if (usage.is_written && !usage.is_read) {
878 qualifier += " writeonly";
879 }
880
881 code.AddLine("layout (std430, binding = {}) {} buffer {} {{", binding++, qualifier,
882 GetGlobalMemoryBlock(base));
883 code.AddLine(" uint {}[];", GetGlobalMemory(base));
884 code.AddLine("}};");
885 code.AddNewLine();
886 }
887 }
888
889 void DeclareSamplers() {
890 u32 binding = device.GetBaseBindings(stage).sampler;
891 for (const auto& sampler : ir.GetSamplers()) {
892 const std::string name = GetSampler(sampler);
893 const std::string description = fmt::format("layout (binding = {}) uniform", binding);
894 binding += sampler.is_indexed ? sampler.size : 1;
895
896 std::string sampler_type = [&]() {
897 if (sampler.is_buffer) {
898 return "samplerBuffer";
899 }
900 switch (sampler.type) {
901 case TextureType::Texture1D:
902 return "sampler1D";
903 case TextureType::Texture2D:
904 return "sampler2D";
905 case TextureType::Texture3D:
906 return "sampler3D";
907 case TextureType::TextureCube:
908 return "samplerCube";
909 default:
910 UNREACHABLE();
911 return "sampler2D";
912 }
913 }();
914 if (sampler.is_array) {
915 sampler_type += "Array";
916 }
917 if (sampler.is_shadow) {
918 sampler_type += "Shadow";
919 }
920
921 if (!sampler.is_indexed) {
922 code.AddLine("{} {} {};", description, sampler_type, name);
923 } else {
924 code.AddLine("{} {} {}[{}];", description, sampler_type, name, sampler.size);
925 }
926 }
927 if (!ir.GetSamplers().empty()) {
928 code.AddNewLine();
929 }
930 }
931
932 void DeclarePhysicalAttributeReader() {
933 if (!ir.HasPhysicalAttributes()) {
934 return;
935 }
936 code.AddLine("float ReadPhysicalAttribute(uint physical_address) {{");
937 ++code.scope;
938 code.AddLine("switch (physical_address) {{");
939
940 // Just declare generic attributes for now.
941 const auto num_attributes{static_cast<u32>(GetNumPhysicalInputAttributes())};
942 for (u32 index = 0; index < num_attributes; ++index) {
943 const auto attribute{ToGenericAttribute(index)};
944 for (u32 element = 0; element < 4; ++element) {
945 constexpr u32 generic_base = 0x80;
946 constexpr u32 generic_stride = 16;
947 constexpr u32 element_stride = 4;
948 const u32 address{generic_base + index * generic_stride + element * element_stride};
949
950 const bool declared = stage != ShaderType::Fragment ||
951 header.ps.GetPixelImap(index) != PixelImap::Unused;
952 const std::string value =
953 declared ? ReadAttribute(attribute, element).AsFloat() : "0.0f";
954 code.AddLine("case 0x{:X}U: return {};", address, value);
955 }
956 }
957
958 code.AddLine("default: return 0;");
959
960 code.AddLine("}}");
961 --code.scope;
962 code.AddLine("}}");
963 code.AddNewLine();
964 }
965
966 void DeclareImages() {
967 u32 binding = device.GetBaseBindings(stage).image;
968 for (const auto& image : ir.GetImages()) {
969 std::string qualifier = "coherent volatile";
970 if (image.is_read && !image.is_written) {
971 qualifier += " readonly";
972 } else if (image.is_written && !image.is_read) {
973 qualifier += " writeonly";
974 }
975
976 const char* format = image.is_atomic ? "r32ui, " : "";
977 const char* type_declaration = GetImageTypeDeclaration(image.type);
978 code.AddLine("layout ({}binding = {}) {} uniform uimage{} {};", format, binding++,
979 qualifier, type_declaration, GetImage(image));
980 }
981 if (!ir.GetImages().empty()) {
982 code.AddNewLine();
983 }
984 }
985
986 void VisitBlock(const NodeBlock& bb) {
987 for (const auto& node : bb) {
988 Visit(node).CheckVoid();
989 }
990 }
991
992 Expression Visit(const Node& node) {
993 if (const auto operation = std::get_if<OperationNode>(&*node)) {
994 if (const auto amend_index = operation->GetAmendIndex()) {
995 Visit(ir.GetAmendNode(*amend_index)).CheckVoid();
996 }
997 const auto operation_index = static_cast<std::size_t>(operation->GetCode());
998 if (operation_index >= operation_decompilers.size()) {
999 UNREACHABLE_MSG("Out of bounds operation: {}", operation_index);
1000 return {};
1001 }
1002 const auto decompiler = operation_decompilers[operation_index];
1003 if (decompiler == nullptr) {
1004 UNREACHABLE_MSG("Undefined operation: {}", operation_index);
1005 return {};
1006 }
1007 return (this->*decompiler)(*operation);
1008 }
1009
1010 if (const auto gpr = std::get_if<GprNode>(&*node)) {
1011 const u32 index = gpr->GetIndex();
1012 if (index == Register::ZeroIndex) {
1013 return {"0U", Type::Uint};
1014 }
1015 return {GetRegister(index), Type::Float};
1016 }
1017
1018 if (const auto cv = std::get_if<CustomVarNode>(&*node)) {
1019 const u32 index = cv->GetIndex();
1020 return {GetCustomVariable(index), Type::Float};
1021 }
1022
1023 if (const auto immediate = std::get_if<ImmediateNode>(&*node)) {
1024 const u32 value = immediate->GetValue();
1025 if (value < 10) {
1026 // For eyecandy avoid using hex numbers on single digits
1027 return {fmt::format("{}U", immediate->GetValue()), Type::Uint};
1028 }
1029 return {fmt::format("0x{:X}U", immediate->GetValue()), Type::Uint};
1030 }
1031
1032 if (const auto predicate = std::get_if<PredicateNode>(&*node)) {
1033 const auto value = [&]() -> std::string {
1034 switch (const auto index = predicate->GetIndex(); index) {
1035 case Tegra::Shader::Pred::UnusedIndex:
1036 return "true";
1037 case Tegra::Shader::Pred::NeverExecute:
1038 return "false";
1039 default:
1040 return GetPredicate(index);
1041 }
1042 }();
1043 if (predicate->IsNegated()) {
1044 return {fmt::format("!({})", value), Type::Bool};
1045 }
1046 return {value, Type::Bool};
1047 }
1048
1049 if (const auto abuf = std::get_if<AbufNode>(&*node)) {
1050 UNIMPLEMENTED_IF_MSG(abuf->IsPhysicalBuffer() && stage == ShaderType::Geometry,
1051 "Physical attributes in geometry shaders are not implemented");
1052 if (abuf->IsPhysicalBuffer()) {
1053 return {fmt::format("ReadPhysicalAttribute({})",
1054 Visit(abuf->GetPhysicalAddress()).AsUint()),
1055 Type::Float};
1056 }
1057 return ReadAttribute(abuf->GetIndex(), abuf->GetElement(), abuf->GetBuffer());
1058 }
1059
1060 if (const auto cbuf = std::get_if<CbufNode>(&*node)) {
1061 const Node offset = cbuf->GetOffset();
1062
1063 if (const auto immediate = std::get_if<ImmediateNode>(&*offset)) {
1064 // Direct access
1065 const u32 offset_imm = immediate->GetValue();
1066 ASSERT_MSG(offset_imm % 4 == 0, "Unaligned cbuf direct access");
1067 return {fmt::format("{}[{}][{}]", GetConstBuffer(cbuf->GetIndex()),
1068 offset_imm / (4 * 4), (offset_imm / 4) % 4),
1069 Type::Uint};
1070 }
1071
1072 // Indirect access
1073 const std::string final_offset = code.GenerateTemporary();
1074 code.AddLine("uint {} = {} >> 2;", final_offset, Visit(offset).AsUint());
1075
1076 if (!device.HasComponentIndexingBug()) {
1077 return {fmt::format("{}[{} >> 2][{} & 3]", GetConstBuffer(cbuf->GetIndex()),
1078 final_offset, final_offset),
1079 Type::Uint};
1080 }
1081
1082 // AMD's proprietary GLSL compiler emits ill code for variable component access.
1083 // To bypass this driver bug generate 4 ifs, one per each component.
1084 const std::string pack = code.GenerateTemporary();
1085 code.AddLine("uvec4 {} = {}[{} >> 2];", pack, GetConstBuffer(cbuf->GetIndex()),
1086 final_offset);
1087
1088 const std::string result = code.GenerateTemporary();
1089 code.AddLine("uint {};", result);
1090 for (u32 swizzle = 0; swizzle < 4; ++swizzle) {
1091 code.AddLine("if (({} & 3) == {}) {} = {}{};", final_offset, swizzle, result, pack,
1092 GetSwizzle(swizzle));
1093 }
1094 return {result, Type::Uint};
1095 }
1096
1097 if (const auto gmem = std::get_if<GmemNode>(&*node)) {
1098 const std::string real = Visit(gmem->GetRealAddress()).AsUint();
1099 const std::string base = Visit(gmem->GetBaseAddress()).AsUint();
1100 const std::string final_offset = fmt::format("({} - {}) >> 2", real, base);
1101 return {fmt::format("{}[{}]", GetGlobalMemory(gmem->GetDescriptor()), final_offset),
1102 Type::Uint};
1103 }
1104
1105 if (const auto lmem = std::get_if<LmemNode>(&*node)) {
1106 return {
1107 fmt::format("{}[{} >> 2]", GetLocalMemory(), Visit(lmem->GetAddress()).AsUint()),
1108 Type::Uint};
1109 }
1110
1111 if (const auto smem = std::get_if<SmemNode>(&*node)) {
1112 return {fmt::format("smem[{} >> 2]", Visit(smem->GetAddress()).AsUint()), Type::Uint};
1113 }
1114
1115 if (const auto internal_flag = std::get_if<InternalFlagNode>(&*node)) {
1116 return {GetInternalFlag(internal_flag->GetFlag()), Type::Bool};
1117 }
1118
1119 if (const auto conditional = std::get_if<ConditionalNode>(&*node)) {
1120 if (const auto amend_index = conditional->GetAmendIndex()) {
1121 Visit(ir.GetAmendNode(*amend_index)).CheckVoid();
1122 }
1123 // It's invalid to call conditional on nested nodes, use an operation instead
1124 code.AddLine("if ({}) {{", Visit(conditional->GetCondition()).AsBool());
1125 ++code.scope;
1126
1127 VisitBlock(conditional->GetCode());
1128
1129 --code.scope;
1130 code.AddLine("}}");
1131 return {};
1132 }
1133
1134 if (const auto comment = std::get_if<CommentNode>(&*node)) {
1135 code.AddLine("// " + comment->GetText());
1136 return {};
1137 }
1138
1139 UNREACHABLE();
1140 return {};
1141 }
1142
1143 Expression ReadAttribute(Attribute::Index attribute, u32 element, const Node& buffer = {}) {
1144 const auto GeometryPass = [&](std::string_view name) {
1145 if (stage == ShaderType::Geometry && buffer) {
1146 // TODO(Rodrigo): Guard geometry inputs against out of bound reads. Some games
1147 // set an 0x80000000 index for those and the shader fails to build. Find out why
1148 // this happens and what's its intent.
1149 return fmt::format("gs_{}[{} % {}]", name, Visit(buffer).AsUint(),
1150 max_input_vertices.value());
1151 }
1152 return std::string(name);
1153 };
1154
1155 switch (attribute) {
1156 case Attribute::Index::Position:
1157 switch (stage) {
1158 case ShaderType::Geometry:
1159 return {fmt::format("gl_in[{}].gl_Position{}", Visit(buffer).AsUint(),
1160 GetSwizzle(element)),
1161 Type::Float};
1162 case ShaderType::Fragment:
1163 return {"gl_FragCoord"s + GetSwizzle(element), Type::Float};
1164 default:
1165 UNREACHABLE();
1166 return {"0", Type::Int};
1167 }
1168 case Attribute::Index::FrontColor:
1169 return {"gl_Color"s + GetSwizzle(element), Type::Float};
1170 case Attribute::Index::FrontSecondaryColor:
1171 return {"gl_SecondaryColor"s + GetSwizzle(element), Type::Float};
1172 case Attribute::Index::PointCoord:
1173 switch (element) {
1174 case 0:
1175 return {"gl_PointCoord.x", Type::Float};
1176 case 1:
1177 return {"gl_PointCoord.y", Type::Float};
1178 case 2:
1179 case 3:
1180 return {"0.0f", Type::Float};
1181 }
1182 UNREACHABLE();
1183 return {"0", Type::Int};
1184 case Attribute::Index::TessCoordInstanceIDVertexID:
1185 // TODO(Subv): Find out what the values are for the first two elements when inside a
1186 // vertex shader, and what's the value of the fourth element when inside a Tess Eval
1187 // shader.
1188 ASSERT(stage == ShaderType::Vertex);
1189 switch (element) {
1190 case 2:
1191 // Config pack's first value is instance_id.
1192 return {"gl_InstanceID", Type::Int};
1193 case 3:
1194 return {"gl_VertexID", Type::Int};
1195 }
1196 UNIMPLEMENTED_MSG("Unmanaged TessCoordInstanceIDVertexID element={}", element);
1197 return {"0", Type::Int};
1198 case Attribute::Index::FrontFacing:
1199 // TODO(Subv): Find out what the values are for the other elements.
1200 ASSERT(stage == ShaderType::Fragment);
1201 switch (element) {
1202 case 3:
1203 return {"(gl_FrontFacing ? -1 : 0)", Type::Int};
1204 }
1205 UNIMPLEMENTED_MSG("Unmanaged FrontFacing element={}", element);
1206 return {"0", Type::Int};
1207 default:
1208 if (IsGenericAttribute(attribute)) {
1209 return {GeometryPass(GetGenericInputAttribute(attribute)) + GetSwizzle(element),
1210 Type::Float};
1211 }
1212 if (IsLegacyTexCoord(attribute)) {
1213 UNIMPLEMENTED_IF(stage == ShaderType::Geometry);
1214 return {fmt::format("gl_TexCoord[{}]{}", GetLegacyTexCoordIndex(attribute),
1215 GetSwizzle(element)),
1216 Type::Float};
1217 }
1218 break;
1219 }
1220 UNIMPLEMENTED_MSG("Unhandled input attribute: {}", attribute);
1221 return {"0", Type::Int};
1222 }
1223
1224 Expression ApplyPrecise(Operation operation, std::string value, Type type) {
1225 if (!IsPrecise(operation)) {
1226 return {std::move(value), type};
1227 }
1228 // Old Nvidia drivers have a bug with precise and texture sampling. These are more likely to
1229 // be found in fragment shaders, so we disable precise there. There are vertex shaders that
1230 // also fail to build but nobody seems to care about those.
1231 // Note: Only bugged drivers will skip precise.
1232 const bool disable_precise = device.HasPreciseBug() && stage == ShaderType::Fragment;
1233
1234 std::string temporary = code.GenerateTemporary();
1235 code.AddLine("{}{} {} = {};", disable_precise ? "" : "precise ", GetTypeString(type),
1236 temporary, value);
1237 return {std::move(temporary), type};
1238 }
1239
1240 Expression VisitOperand(Operation operation, std::size_t operand_index) {
1241 const auto& operand = operation[operand_index];
1242 const bool parent_precise = IsPrecise(operation);
1243 const bool child_precise = IsPrecise(operand);
1244 const bool child_trivial = !std::holds_alternative<OperationNode>(*operand);
1245 if (!parent_precise || child_precise || child_trivial) {
1246 return Visit(operand);
1247 }
1248
1249 Expression value = Visit(operand);
1250 std::string temporary = code.GenerateTemporary();
1251 code.AddLine("{} {} = {};", GetTypeString(value.GetType()), temporary, value.GetCode());
1252 return {std::move(temporary), value.GetType()};
1253 }
1254
1255 std::optional<Expression> GetOutputAttribute(const AbufNode* abuf) {
1256 const u32 element = abuf->GetElement();
1257 switch (const auto attribute = abuf->GetIndex()) {
1258 case Attribute::Index::Position:
1259 return {{"gl_Position"s + GetSwizzle(element), Type::Float}};
1260 case Attribute::Index::LayerViewportPointSize:
1261 switch (element) {
1262 case 0:
1263 UNIMPLEMENTED();
1264 return std::nullopt;
1265 case 1:
1266 if (stage == ShaderType::Vertex && !device.HasVertexViewportLayer()) {
1267 return std::nullopt;
1268 }
1269 return {{"gl_Layer", Type::Int}};
1270 case 2:
1271 if (stage == ShaderType::Vertex && !device.HasVertexViewportLayer()) {
1272 return std::nullopt;
1273 }
1274 return {{"gl_ViewportIndex", Type::Int}};
1275 case 3:
1276 return {{"gl_PointSize", Type::Float}};
1277 }
1278 return std::nullopt;
1279 case Attribute::Index::FrontColor:
1280 return {{"gl_FrontColor"s + GetSwizzle(element), Type::Float}};
1281 case Attribute::Index::FrontSecondaryColor:
1282 return {{"gl_FrontSecondaryColor"s + GetSwizzle(element), Type::Float}};
1283 case Attribute::Index::BackColor:
1284 return {{"gl_BackColor"s + GetSwizzle(element), Type::Float}};
1285 case Attribute::Index::BackSecondaryColor:
1286 return {{"gl_BackSecondaryColor"s + GetSwizzle(element), Type::Float}};
1287 case Attribute::Index::ClipDistances0123:
1288 return {{fmt::format("gl_ClipDistance[{}]", element), Type::Float}};
1289 case Attribute::Index::ClipDistances4567:
1290 return {{fmt::format("gl_ClipDistance[{}]", element + 4), Type::Float}};
1291 default:
1292 if (IsGenericAttribute(attribute)) {
1293 return {{GetGenericOutputAttribute(attribute, element), Type::Float}};
1294 }
1295 if (IsLegacyTexCoord(attribute)) {
1296 return {{fmt::format("gl_TexCoord[{}]{}", GetLegacyTexCoordIndex(attribute),
1297 GetSwizzle(element)),
1298 Type::Float}};
1299 }
1300 UNIMPLEMENTED_MSG("Unhandled output attribute: {}", attribute);
1301 return std::nullopt;
1302 }
1303 }
1304
1305 Expression GenerateUnary(Operation operation, std::string_view func, Type result_type,
1306 Type type_a) {
1307 std::string op_str = fmt::format("{}({})", func, VisitOperand(operation, 0).As(type_a));
1308 return ApplyPrecise(operation, std::move(op_str), result_type);
1309 }
1310
1311 Expression GenerateBinaryInfix(Operation operation, std::string_view func, Type result_type,
1312 Type type_a, Type type_b) {
1313 const std::string op_a = VisitOperand(operation, 0).As(type_a);
1314 const std::string op_b = VisitOperand(operation, 1).As(type_b);
1315 std::string op_str = fmt::format("({} {} {})", op_a, func, op_b);
1316
1317 return ApplyPrecise(operation, std::move(op_str), result_type);
1318 }
1319
1320 Expression GenerateBinaryCall(Operation operation, std::string_view func, Type result_type,
1321 Type type_a, Type type_b) {
1322 const std::string op_a = VisitOperand(operation, 0).As(type_a);
1323 const std::string op_b = VisitOperand(operation, 1).As(type_b);
1324 std::string op_str = fmt::format("{}({}, {})", func, op_a, op_b);
1325
1326 return ApplyPrecise(operation, std::move(op_str), result_type);
1327 }
1328
1329 Expression GenerateTernary(Operation operation, std::string_view func, Type result_type,
1330 Type type_a, Type type_b, Type type_c) {
1331 const std::string op_a = VisitOperand(operation, 0).As(type_a);
1332 const std::string op_b = VisitOperand(operation, 1).As(type_b);
1333 const std::string op_c = VisitOperand(operation, 2).As(type_c);
1334 std::string op_str = fmt::format("{}({}, {}, {})", func, op_a, op_b, op_c);
1335
1336 return ApplyPrecise(operation, std::move(op_str), result_type);
1337 }
1338
1339 Expression GenerateQuaternary(Operation operation, const std::string& func, Type result_type,
1340 Type type_a, Type type_b, Type type_c, Type type_d) {
1341 const std::string op_a = VisitOperand(operation, 0).As(type_a);
1342 const std::string op_b = VisitOperand(operation, 1).As(type_b);
1343 const std::string op_c = VisitOperand(operation, 2).As(type_c);
1344 const std::string op_d = VisitOperand(operation, 3).As(type_d);
1345 std::string op_str = fmt::format("{}({}, {}, {}, {})", func, op_a, op_b, op_c, op_d);
1346
1347 return ApplyPrecise(operation, std::move(op_str), result_type);
1348 }
1349
1350 std::string GenerateTexture(Operation operation, const std::string& function_suffix,
1351 const std::vector<TextureIR>& extras, bool separate_dc = false) {
1352 constexpr std::array coord_constructors = {"float", "vec2", "vec3", "vec4"};
1353
1354 const auto meta = std::get_if<MetaTexture>(&operation.GetMeta());
1355 ASSERT(meta);
1356
1357 const std::size_t count = operation.GetOperandsCount();
1358 const bool has_array = meta->sampler.is_array;
1359 const bool has_shadow = meta->sampler.is_shadow;
1360 const bool workaround_lod_array_shadow_as_grad =
1361 !device.HasTextureShadowLod() && function_suffix == "Lod" && meta->sampler.is_shadow &&
1362 ((meta->sampler.type == TextureType::Texture2D && meta->sampler.is_array) ||
1363 meta->sampler.type == TextureType::TextureCube);
1364
1365 std::string expr = "texture";
1366
1367 if (workaround_lod_array_shadow_as_grad) {
1368 expr += "Grad";
1369 } else {
1370 expr += function_suffix;
1371 }
1372
1373 if (!meta->aoffi.empty()) {
1374 expr += "Offset";
1375 } else if (!meta->ptp.empty()) {
1376 expr += "Offsets";
1377 }
1378 if (!meta->sampler.is_indexed) {
1379 expr += '(' + GetSampler(meta->sampler) + ", ";
1380 } else {
1381 expr += '(' + GetSampler(meta->sampler) + '[' + Visit(meta->index).AsUint() + "], ";
1382 }
1383 expr += coord_constructors.at(count + (has_array ? 1 : 0) +
1384 (has_shadow && !separate_dc ? 1 : 0) - 1);
1385 expr += '(';
1386 for (std::size_t i = 0; i < count; ++i) {
1387 expr += Visit(operation[i]).AsFloat();
1388
1389 const std::size_t next = i + 1;
1390 if (next < count)
1391 expr += ", ";
1392 }
1393 if (has_array) {
1394 expr += ", float(" + Visit(meta->array).AsInt() + ')';
1395 }
1396 if (has_shadow) {
1397 if (separate_dc) {
1398 expr += "), " + Visit(meta->depth_compare).AsFloat();
1399 } else {
1400 expr += ", " + Visit(meta->depth_compare).AsFloat() + ')';
1401 }
1402 } else {
1403 expr += ')';
1404 }
1405
1406 if (workaround_lod_array_shadow_as_grad) {
1407 switch (meta->sampler.type) {
1408 case TextureType::Texture2D:
1409 return expr + ", vec2(0.0), vec2(0.0))";
1410 case TextureType::TextureCube:
1411 return expr + ", vec3(0.0), vec3(0.0))";
1412 default:
1413 UNREACHABLE();
1414 break;
1415 }
1416 }
1417
1418 for (const auto& variant : extras) {
1419 if (const auto argument = std::get_if<TextureArgument>(&variant)) {
1420 expr += GenerateTextureArgument(*argument);
1421 } else if (std::holds_alternative<TextureOffset>(variant)) {
1422 if (!meta->aoffi.empty()) {
1423 expr += GenerateTextureAoffi(meta->aoffi);
1424 } else if (!meta->ptp.empty()) {
1425 expr += GenerateTexturePtp(meta->ptp);
1426 }
1427 } else if (std::holds_alternative<TextureDerivates>(variant)) {
1428 expr += GenerateTextureDerivates(meta->derivates);
1429 } else {
1430 UNREACHABLE();
1431 }
1432 }
1433
1434 return expr + ')';
1435 }
1436
1437 std::string GenerateTextureArgument(const TextureArgument& argument) {
1438 const auto& [type, operand] = argument;
1439 if (operand == nullptr) {
1440 return {};
1441 }
1442
1443 std::string expr = ", ";
1444 switch (type) {
1445 case Type::Int:
1446 if (const auto immediate = std::get_if<ImmediateNode>(&*operand)) {
1447 // Inline the string as an immediate integer in GLSL (some extra arguments are
1448 // required to be constant)
1449 expr += std::to_string(static_cast<s32>(immediate->GetValue()));
1450 } else {
1451 expr += Visit(operand).AsInt();
1452 }
1453 break;
1454 case Type::Float:
1455 expr += Visit(operand).AsFloat();
1456 break;
1457 default: {
1458 const auto type_int = static_cast<u32>(type);
1459 UNIMPLEMENTED_MSG("Unimplemented extra type={}", type_int);
1460 expr += '0';
1461 break;
1462 }
1463 }
1464 return expr;
1465 }
1466
1467 std::string ReadTextureOffset(const Node& value) {
1468 if (const auto immediate = std::get_if<ImmediateNode>(&*value)) {
1469 // Inline the string as an immediate integer in GLSL (AOFFI arguments are required
1470 // to be constant by the standard).
1471 return std::to_string(static_cast<s32>(immediate->GetValue()));
1472 } else if (device.HasVariableAoffi()) {
1473 // Avoid using variable AOFFI on unsupported devices.
1474 return Visit(value).AsInt();
1475 } else {
1476 // Insert 0 on devices not supporting variable AOFFI.
1477 return "0";
1478 }
1479 }
1480
1481 std::string GenerateTextureAoffi(const std::vector<Node>& aoffi) {
1482 if (aoffi.empty()) {
1483 return {};
1484 }
1485 constexpr std::array coord_constructors = {"int", "ivec2", "ivec3"};
1486 std::string expr = ", ";
1487 expr += coord_constructors.at(aoffi.size() - 1);
1488 expr += '(';
1489
1490 for (std::size_t index = 0; index < aoffi.size(); ++index) {
1491 expr += ReadTextureOffset(aoffi.at(index));
1492 if (index + 1 < aoffi.size()) {
1493 expr += ", ";
1494 }
1495 }
1496 expr += ')';
1497
1498 return expr;
1499 }
1500
1501 std::string GenerateTexturePtp(const std::vector<Node>& ptp) {
1502 static constexpr std::size_t num_vectors = 4;
1503 ASSERT(ptp.size() == num_vectors * 2);
1504
1505 std::string expr = ", ivec2[](";
1506 for (std::size_t vector = 0; vector < num_vectors; ++vector) {
1507 const bool has_next = vector + 1 < num_vectors;
1508 expr += fmt::format("ivec2({}, {}){}", ReadTextureOffset(ptp.at(vector * 2)),
1509 ReadTextureOffset(ptp.at(vector * 2 + 1)), has_next ? ", " : "");
1510 }
1511 expr += ')';
1512 return expr;
1513 }
1514
1515 std::string GenerateTextureDerivates(const std::vector<Node>& derivates) {
1516 if (derivates.empty()) {
1517 return {};
1518 }
1519 constexpr std::array coord_constructors = {"float", "vec2", "vec3"};
1520 std::string expr = ", ";
1521 const std::size_t components = derivates.size() / 2;
1522 std::string dx = coord_constructors.at(components - 1);
1523 std::string dy = coord_constructors.at(components - 1);
1524 dx += '(';
1525 dy += '(';
1526
1527 for (std::size_t index = 0; index < components; ++index) {
1528 const auto& operand_x{derivates.at(index * 2)};
1529 const auto& operand_y{derivates.at(index * 2 + 1)};
1530 dx += Visit(operand_x).AsFloat();
1531 dy += Visit(operand_y).AsFloat();
1532
1533 if (index + 1 < components) {
1534 dx += ", ";
1535 dy += ", ";
1536 }
1537 }
1538 dx += ')';
1539 dy += ')';
1540 expr += dx + ", " + dy;
1541
1542 return expr;
1543 }
1544
1545 std::string BuildIntegerCoordinates(Operation operation) {
1546 constexpr std::array constructors{"int(", "ivec2(", "ivec3(", "ivec4("};
1547 const std::size_t coords_count{operation.GetOperandsCount()};
1548 std::string expr = constructors.at(coords_count - 1);
1549 for (std::size_t i = 0; i < coords_count; ++i) {
1550 expr += VisitOperand(operation, i).AsInt();
1551 if (i + 1 < coords_count) {
1552 expr += ", ";
1553 }
1554 }
1555 expr += ')';
1556 return expr;
1557 }
1558
1559 std::string BuildImageValues(Operation operation) {
1560 constexpr std::array constructors{"uint", "uvec2", "uvec3", "uvec4"};
1561 const auto& meta{std::get<MetaImage>(operation.GetMeta())};
1562
1563 const std::size_t values_count{meta.values.size()};
1564 std::string expr = fmt::format("{}(", constructors.at(values_count - 1));
1565 for (std::size_t i = 0; i < values_count; ++i) {
1566 expr += Visit(meta.values.at(i)).AsUint();
1567 if (i + 1 < values_count) {
1568 expr += ", ";
1569 }
1570 }
1571 expr += ')';
1572 return expr;
1573 }
1574
1575 Expression Assign(Operation operation) {
1576 const Node& dest = operation[0];
1577 const Node& src = operation[1];
1578
1579 Expression target;
1580 if (const auto gpr = std::get_if<GprNode>(&*dest)) {
1581 if (gpr->GetIndex() == Register::ZeroIndex) {
1582 // Writing to Register::ZeroIndex is a no op but we still have to visit the source
1583 // as it might have side effects.
1584 code.AddLine("{};", Visit(src).GetCode());
1585 return {};
1586 }
1587 target = {GetRegister(gpr->GetIndex()), Type::Float};
1588 } else if (const auto abuf = std::get_if<AbufNode>(&*dest)) {
1589 UNIMPLEMENTED_IF(abuf->IsPhysicalBuffer());
1590 auto output = GetOutputAttribute(abuf);
1591 if (!output) {
1592 return {};
1593 }
1594 target = std::move(*output);
1595 } else if (const auto lmem = std::get_if<LmemNode>(&*dest)) {
1596 target = {
1597 fmt::format("{}[{} >> 2]", GetLocalMemory(), Visit(lmem->GetAddress()).AsUint()),
1598 Type::Uint};
1599 } else if (const auto smem = std::get_if<SmemNode>(&*dest)) {
1600 ASSERT(stage == ShaderType::Compute);
1601 target = {fmt::format("smem[{} >> 2]", Visit(smem->GetAddress()).AsUint()), Type::Uint};
1602 } else if (const auto gmem = std::get_if<GmemNode>(&*dest)) {
1603 const std::string real = Visit(gmem->GetRealAddress()).AsUint();
1604 const std::string base = Visit(gmem->GetBaseAddress()).AsUint();
1605 const std::string final_offset = fmt::format("({} - {}) >> 2", real, base);
1606 target = {fmt::format("{}[{}]", GetGlobalMemory(gmem->GetDescriptor()), final_offset),
1607 Type::Uint};
1608 } else if (const auto cv = std::get_if<CustomVarNode>(&*dest)) {
1609 target = {GetCustomVariable(cv->GetIndex()), Type::Float};
1610 } else {
1611 UNREACHABLE_MSG("Assign called without a proper target");
1612 }
1613
1614 code.AddLine("{} = {};", target.GetCode(), Visit(src).As(target.GetType()));
1615 return {};
1616 }
1617
1618 template <Type type>
1619 Expression Add(Operation operation) {
1620 return GenerateBinaryInfix(operation, "+", type, type, type);
1621 }
1622
1623 template <Type type>
1624 Expression Mul(Operation operation) {
1625 return GenerateBinaryInfix(operation, "*", type, type, type);
1626 }
1627
1628 template <Type type>
1629 Expression Div(Operation operation) {
1630 return GenerateBinaryInfix(operation, "/", type, type, type);
1631 }
1632
1633 template <Type type>
1634 Expression Fma(Operation operation) {
1635 return GenerateTernary(operation, "fma", type, type, type, type);
1636 }
1637
1638 template <Type type>
1639 Expression Negate(Operation operation) {
1640 return GenerateUnary(operation, "-", type, type);
1641 }
1642
1643 template <Type type>
1644 Expression Absolute(Operation operation) {
1645 return GenerateUnary(operation, "abs", type, type);
1646 }
1647
1648 Expression FClamp(Operation operation) {
1649 return GenerateTernary(operation, "clamp", Type::Float, Type::Float, Type::Float,
1650 Type::Float);
1651 }
1652
1653 Expression FCastHalf0(Operation operation) {
1654 return {fmt::format("({})[0]", VisitOperand(operation, 0).AsHalfFloat()), Type::Float};
1655 }
1656
1657 Expression FCastHalf1(Operation operation) {
1658 return {fmt::format("({})[1]", VisitOperand(operation, 0).AsHalfFloat()), Type::Float};
1659 }
1660
1661 template <Type type>
1662 Expression Min(Operation operation) {
1663 return GenerateBinaryCall(operation, "min", type, type, type);
1664 }
1665
1666 template <Type type>
1667 Expression Max(Operation operation) {
1668 return GenerateBinaryCall(operation, "max", type, type, type);
1669 }
1670
1671 Expression Select(Operation operation) {
1672 const std::string condition = Visit(operation[0]).AsBool();
1673 const std::string true_case = Visit(operation[1]).AsUint();
1674 const std::string false_case = Visit(operation[2]).AsUint();
1675 std::string op_str = fmt::format("({} ? {} : {})", condition, true_case, false_case);
1676
1677 return ApplyPrecise(operation, std::move(op_str), Type::Uint);
1678 }
1679
1680 Expression FCos(Operation operation) {
1681 return GenerateUnary(operation, "cos", Type::Float, Type::Float);
1682 }
1683
1684 Expression FSin(Operation operation) {
1685 return GenerateUnary(operation, "sin", Type::Float, Type::Float);
1686 }
1687
1688 Expression FExp2(Operation operation) {
1689 return GenerateUnary(operation, "exp2", Type::Float, Type::Float);
1690 }
1691
1692 Expression FLog2(Operation operation) {
1693 return GenerateUnary(operation, "log2", Type::Float, Type::Float);
1694 }
1695
1696 Expression FInverseSqrt(Operation operation) {
1697 return GenerateUnary(operation, "inversesqrt", Type::Float, Type::Float);
1698 }
1699
1700 Expression FSqrt(Operation operation) {
1701 return GenerateUnary(operation, "sqrt", Type::Float, Type::Float);
1702 }
1703
1704 Expression FRoundEven(Operation operation) {
1705 return GenerateUnary(operation, "roundEven", Type::Float, Type::Float);
1706 }
1707
1708 Expression FFloor(Operation operation) {
1709 return GenerateUnary(operation, "floor", Type::Float, Type::Float);
1710 }
1711
1712 Expression FCeil(Operation operation) {
1713 return GenerateUnary(operation, "ceil", Type::Float, Type::Float);
1714 }
1715
1716 Expression FTrunc(Operation operation) {
1717 return GenerateUnary(operation, "trunc", Type::Float, Type::Float);
1718 }
1719
1720 template <Type type>
1721 Expression FCastInteger(Operation operation) {
1722 return GenerateUnary(operation, "float", Type::Float, type);
1723 }
1724
1725 Expression FSwizzleAdd(Operation operation) {
1726 const std::string op_a = VisitOperand(operation, 0).AsFloat();
1727 const std::string op_b = VisitOperand(operation, 1).AsFloat();
1728
1729 if (!device.HasShaderBallot()) {
1730 LOG_ERROR(Render_OpenGL, "Shader ballot is unavailable but required by the shader");
1731 return {fmt::format("{} + {}", op_a, op_b), Type::Float};
1732 }
1733
1734 const std::string instr_mask = VisitOperand(operation, 2).AsUint();
1735 const std::string mask = code.GenerateTemporary();
1736 code.AddLine("uint {} = ({} >> ((gl_SubGroupInvocationARB & 3) << 1)) & 3;", mask,
1737 instr_mask);
1738
1739 const std::string modifier_a = fmt::format("fswzadd_modifiers_a[{}]", mask);
1740 const std::string modifier_b = fmt::format("fswzadd_modifiers_b[{}]", mask);
1741 return {fmt::format("(({} * {}) + ({} * {}))", op_a, modifier_a, op_b, modifier_b),
1742 Type::Float};
1743 }
1744
1745 Expression ICastFloat(Operation operation) {
1746 return GenerateUnary(operation, "int", Type::Int, Type::Float);
1747 }
1748
1749 Expression ICastUnsigned(Operation operation) {
1750 return GenerateUnary(operation, "int", Type::Int, Type::Uint);
1751 }
1752
1753 template <Type type>
1754 Expression LogicalShiftLeft(Operation operation) {
1755 return GenerateBinaryInfix(operation, "<<", type, type, Type::Uint);
1756 }
1757
1758 Expression ILogicalShiftRight(Operation operation) {
1759 const std::string op_a = VisitOperand(operation, 0).AsUint();
1760 const std::string op_b = VisitOperand(operation, 1).AsUint();
1761 std::string op_str = fmt::format("int({} >> {})", op_a, op_b);
1762
1763 return ApplyPrecise(operation, std::move(op_str), Type::Int);
1764 }
1765
1766 Expression IArithmeticShiftRight(Operation operation) {
1767 return GenerateBinaryInfix(operation, ">>", Type::Int, Type::Int, Type::Uint);
1768 }
1769
1770 template <Type type>
1771 Expression BitwiseAnd(Operation operation) {
1772 return GenerateBinaryInfix(operation, "&", type, type, type);
1773 }
1774
1775 template <Type type>
1776 Expression BitwiseOr(Operation operation) {
1777 return GenerateBinaryInfix(operation, "|", type, type, type);
1778 }
1779
1780 template <Type type>
1781 Expression BitwiseXor(Operation operation) {
1782 return GenerateBinaryInfix(operation, "^", type, type, type);
1783 }
1784
1785 template <Type type>
1786 Expression BitwiseNot(Operation operation) {
1787 return GenerateUnary(operation, "~", type, type);
1788 }
1789
1790 Expression UCastFloat(Operation operation) {
1791 return GenerateUnary(operation, "uint", Type::Uint, Type::Float);
1792 }
1793
1794 Expression UCastSigned(Operation operation) {
1795 return GenerateUnary(operation, "uint", Type::Uint, Type::Int);
1796 }
1797
1798 Expression UShiftRight(Operation operation) {
1799 return GenerateBinaryInfix(operation, ">>", Type::Uint, Type::Uint, Type::Uint);
1800 }
1801
1802 template <Type type>
1803 Expression BitfieldInsert(Operation operation) {
1804 return GenerateQuaternary(operation, "bitfieldInsert", type, type, type, Type::Int,
1805 Type::Int);
1806 }
1807
1808 template <Type type>
1809 Expression BitfieldExtract(Operation operation) {
1810 return GenerateTernary(operation, "bitfieldExtract", type, type, Type::Int, Type::Int);
1811 }
1812
1813 template <Type type>
1814 Expression BitCount(Operation operation) {
1815 return GenerateUnary(operation, "bitCount", type, type);
1816 }
1817
1818 template <Type type>
1819 Expression BitMSB(Operation operation) {
1820 return GenerateUnary(operation, "findMSB", type, type);
1821 }
1822
1823 Expression HNegate(Operation operation) {
1824 const auto GetNegate = [&](std::size_t index) {
1825 return VisitOperand(operation, index).AsBool() + " ? -1 : 1";
1826 };
1827 return {fmt::format("({} * vec2({}, {}))", VisitOperand(operation, 0).AsHalfFloat(),
1828 GetNegate(1), GetNegate(2)),
1829 Type::HalfFloat};
1830 }
1831
1832 Expression HClamp(Operation operation) {
1833 const std::string value = VisitOperand(operation, 0).AsHalfFloat();
1834 const std::string min = VisitOperand(operation, 1).AsFloat();
1835 const std::string max = VisitOperand(operation, 2).AsFloat();
1836 std::string clamped = fmt::format("clamp({}, vec2({}), vec2({}))", value, min, max);
1837
1838 return ApplyPrecise(operation, std::move(clamped), Type::HalfFloat);
1839 }
1840
1841 Expression HCastFloat(Operation operation) {
1842 return {fmt::format("vec2({}, 0.0f)", VisitOperand(operation, 0).AsFloat()),
1843 Type::HalfFloat};
1844 }
1845
1846 Expression HUnpack(Operation operation) {
1847 Expression operand = VisitOperand(operation, 0);
1848 switch (std::get<Tegra::Shader::HalfType>(operation.GetMeta())) {
1849 case Tegra::Shader::HalfType::H0_H1:
1850 return operand;
1851 case Tegra::Shader::HalfType::F32:
1852 return {fmt::format("vec2({})", operand.AsFloat()), Type::HalfFloat};
1853 case Tegra::Shader::HalfType::H0_H0:
1854 return {fmt::format("vec2({}[0])", operand.AsHalfFloat()), Type::HalfFloat};
1855 case Tegra::Shader::HalfType::H1_H1:
1856 return {fmt::format("vec2({}[1])", operand.AsHalfFloat()), Type::HalfFloat};
1857 }
1858 UNREACHABLE();
1859 return {"0", Type::Int};
1860 }
1861
1862 Expression HMergeF32(Operation operation) {
1863 return {fmt::format("float({}[0])", VisitOperand(operation, 0).AsHalfFloat()), Type::Float};
1864 }
1865
1866 Expression HMergeH0(Operation operation) {
1867 const std::string dest = VisitOperand(operation, 0).AsUint();
1868 const std::string src = VisitOperand(operation, 1).AsUint();
1869 return {fmt::format("vec2(unpackHalf2x16({}).x, unpackHalf2x16({}).y)", src, dest),
1870 Type::HalfFloat};
1871 }
1872
1873 Expression HMergeH1(Operation operation) {
1874 const std::string dest = VisitOperand(operation, 0).AsUint();
1875 const std::string src = VisitOperand(operation, 1).AsUint();
1876 return {fmt::format("vec2(unpackHalf2x16({}).x, unpackHalf2x16({}).y)", dest, src),
1877 Type::HalfFloat};
1878 }
1879
1880 Expression HPack2(Operation operation) {
1881 return {fmt::format("vec2({}, {})", VisitOperand(operation, 0).AsFloat(),
1882 VisitOperand(operation, 1).AsFloat()),
1883 Type::HalfFloat};
1884 }
1885
1886 template <const std::string_view& op, Type type, bool unordered = false>
1887 Expression Comparison(Operation operation) {
1888 static_assert(!unordered || type == Type::Float);
1889
1890 Expression expr = GenerateBinaryInfix(operation, op, Type::Bool, type, type);
1891
1892 if constexpr (op.compare("!=") == 0 && type == Type::Float && !unordered) {
1893 // GLSL's operator!=(float, float) doesn't seem be ordered. This happens on both AMD's
1894 // and Nvidia's proprietary stacks. Manually force an ordered comparison.
1895 return {fmt::format("({} && !isnan({}) && !isnan({}))", expr.AsBool(),
1896 VisitOperand(operation, 0).AsFloat(),
1897 VisitOperand(operation, 1).AsFloat()),
1898 Type::Bool};
1899 }
1900 if constexpr (!unordered) {
1901 return expr;
1902 }
1903 // Unordered comparisons are always true for NaN operands.
1904 return {fmt::format("({} || isnan({}) || isnan({}))", expr.AsBool(),
1905 VisitOperand(operation, 0).AsFloat(),
1906 VisitOperand(operation, 1).AsFloat()),
1907 Type::Bool};
1908 }
1909
1910 Expression FOrdered(Operation operation) {
1911 return {fmt::format("(!isnan({}) && !isnan({}))", VisitOperand(operation, 0).AsFloat(),
1912 VisitOperand(operation, 1).AsFloat()),
1913 Type::Bool};
1914 }
1915
1916 Expression FUnordered(Operation operation) {
1917 return {fmt::format("(isnan({}) || isnan({}))", VisitOperand(operation, 0).AsFloat(),
1918 VisitOperand(operation, 1).AsFloat()),
1919 Type::Bool};
1920 }
1921
1922 Expression LogicalAddCarry(Operation operation) {
1923 const std::string carry = code.GenerateTemporary();
1924 code.AddLine("uint {};", carry);
1925 code.AddLine("uaddCarry({}, {}, {});", VisitOperand(operation, 0).AsUint(),
1926 VisitOperand(operation, 1).AsUint(), carry);
1927 return {fmt::format("({} != 0)", carry), Type::Bool};
1928 }
1929
1930 Expression LogicalAssign(Operation operation) {
1931 const Node& dest = operation[0];
1932 const Node& src = operation[1];
1933
1934 std::string target;
1935
1936 if (const auto pred = std::get_if<PredicateNode>(&*dest)) {
1937 ASSERT_MSG(!pred->IsNegated(), "Negating logical assignment");
1938
1939 const auto index = pred->GetIndex();
1940 switch (index) {
1941 case Tegra::Shader::Pred::NeverExecute:
1942 case Tegra::Shader::Pred::UnusedIndex:
1943 // Writing to these predicates is a no-op
1944 return {};
1945 }
1946 target = GetPredicate(index);
1947 } else if (const auto flag = std::get_if<InternalFlagNode>(&*dest)) {
1948 target = GetInternalFlag(flag->GetFlag());
1949 }
1950
1951 code.AddLine("{} = {};", target, Visit(src).AsBool());
1952 return {};
1953 }
1954
1955 Expression LogicalAnd(Operation operation) {
1956 return GenerateBinaryInfix(operation, "&&", Type::Bool, Type::Bool, Type::Bool);
1957 }
1958
1959 Expression LogicalOr(Operation operation) {
1960 return GenerateBinaryInfix(operation, "||", Type::Bool, Type::Bool, Type::Bool);
1961 }
1962
1963 Expression LogicalXor(Operation operation) {
1964 return GenerateBinaryInfix(operation, "^^", Type::Bool, Type::Bool, Type::Bool);
1965 }
1966
1967 Expression LogicalNegate(Operation operation) {
1968 return GenerateUnary(operation, "!", Type::Bool, Type::Bool);
1969 }
1970
1971 Expression LogicalPick2(Operation operation) {
1972 return {fmt::format("{}[{}]", VisitOperand(operation, 0).AsBool2(),
1973 VisitOperand(operation, 1).AsUint()),
1974 Type::Bool};
1975 }
1976
1977 Expression LogicalAnd2(Operation operation) {
1978 return GenerateUnary(operation, "all", Type::Bool, Type::Bool2);
1979 }
1980
1981 template <bool with_nan>
1982 Expression GenerateHalfComparison(Operation operation, std::string_view compare_op) {
1983 Expression comparison = GenerateBinaryCall(operation, compare_op, Type::Bool2,
1984 Type::HalfFloat, Type::HalfFloat);
1985 if constexpr (!with_nan) {
1986 return comparison;
1987 }
1988 return {fmt::format("HalfFloatNanComparison({}, {}, {})", comparison.AsBool2(),
1989 VisitOperand(operation, 0).AsHalfFloat(),
1990 VisitOperand(operation, 1).AsHalfFloat()),
1991 Type::Bool2};
1992 }
1993
1994 template <bool with_nan>
1995 Expression Logical2HLessThan(Operation operation) {
1996 return GenerateHalfComparison<with_nan>(operation, "lessThan");
1997 }
1998
1999 template <bool with_nan>
2000 Expression Logical2HEqual(Operation operation) {
2001 return GenerateHalfComparison<with_nan>(operation, "equal");
2002 }
2003
2004 template <bool with_nan>
2005 Expression Logical2HLessEqual(Operation operation) {
2006 return GenerateHalfComparison<with_nan>(operation, "lessThanEqual");
2007 }
2008
2009 template <bool with_nan>
2010 Expression Logical2HGreaterThan(Operation operation) {
2011 return GenerateHalfComparison<with_nan>(operation, "greaterThan");
2012 }
2013
2014 template <bool with_nan>
2015 Expression Logical2HNotEqual(Operation operation) {
2016 return GenerateHalfComparison<with_nan>(operation, "notEqual");
2017 }
2018
2019 template <bool with_nan>
2020 Expression Logical2HGreaterEqual(Operation operation) {
2021 return GenerateHalfComparison<with_nan>(operation, "greaterThanEqual");
2022 }
2023
2024 Expression Texture(Operation operation) {
2025 const auto meta = std::get<MetaTexture>(operation.GetMeta());
2026 const bool separate_dc = meta.sampler.type == TextureType::TextureCube &&
2027 meta.sampler.is_array && meta.sampler.is_shadow;
2028 // TODO: Replace this with an array and make GenerateTexture use C++20 std::span
2029 const std::vector<TextureIR> extras{
2030 TextureOffset{},
2031 TextureArgument{Type::Float, meta.bias},
2032 };
2033 std::string expr = GenerateTexture(operation, "", extras, separate_dc);
2034 if (meta.sampler.is_shadow) {
2035 expr = fmt::format("vec4({})", expr);
2036 }
2037 return {expr + GetSwizzle(meta.element), Type::Float};
2038 }
2039
2040 Expression TextureLod(Operation operation) {
2041 const auto meta = std::get_if<MetaTexture>(&operation.GetMeta());
2042 ASSERT(meta);
2043
2044 std::string expr{};
2045
2046 if (!device.HasTextureShadowLod() && meta->sampler.is_shadow &&
2047 ((meta->sampler.type == TextureType::Texture2D && meta->sampler.is_array) ||
2048 meta->sampler.type == TextureType::TextureCube)) {
2049 LOG_ERROR(Render_OpenGL,
2050 "Device lacks GL_EXT_texture_shadow_lod, using textureGrad as a workaround");
2051 expr = GenerateTexture(operation, "Lod", {});
2052 } else {
2053 expr = GenerateTexture(operation, "Lod",
2054 {TextureArgument{Type::Float, meta->lod}, TextureOffset{}});
2055 }
2056
2057 if (meta->sampler.is_shadow) {
2058 expr = "vec4(" + expr + ')';
2059 }
2060 return {expr + GetSwizzle(meta->element), Type::Float};
2061 }
2062
2063 Expression TextureGather(Operation operation) {
2064 const auto& meta = std::get<MetaTexture>(operation.GetMeta());
2065
2066 const auto type = meta.sampler.is_shadow ? Type::Float : Type::Int;
2067 const bool separate_dc = meta.sampler.is_shadow;
2068
2069 std::vector<TextureIR> ir_;
2070 if (meta.sampler.is_shadow) {
2071 ir_ = {TextureOffset{}};
2072 } else {
2073 ir_ = {TextureOffset{}, TextureArgument{type, meta.component}};
2074 }
2075 return {GenerateTexture(operation, "Gather", ir_, separate_dc) + GetSwizzle(meta.element),
2076 Type::Float};
2077 }
2078
2079 Expression TextureQueryDimensions(Operation operation) {
2080 const auto meta = std::get_if<MetaTexture>(&operation.GetMeta());
2081 ASSERT(meta);
2082
2083 const std::string sampler = GetSampler(meta->sampler);
2084 const std::string lod = VisitOperand(operation, 0).AsInt();
2085
2086 switch (meta->element) {
2087 case 0:
2088 case 1:
2089 return {fmt::format("textureSize({}, {}){}", sampler, lod, GetSwizzle(meta->element)),
2090 Type::Int};
2091 case 3:
2092 return {fmt::format("textureQueryLevels({})", sampler), Type::Int};
2093 }
2094 UNREACHABLE();
2095 return {"0", Type::Int};
2096 }
2097
2098 Expression TextureQueryLod(Operation operation) {
2099 const auto meta = std::get_if<MetaTexture>(&operation.GetMeta());
2100 ASSERT(meta);
2101
2102 if (meta->element < 2) {
2103 return {fmt::format("int(({} * vec2(256)){})",
2104 GenerateTexture(operation, "QueryLod", {}),
2105 GetSwizzle(meta->element)),
2106 Type::Int};
2107 }
2108 return {"0", Type::Int};
2109 }
2110
2111 Expression TexelFetch(Operation operation) {
2112 constexpr std::array constructors = {"int", "ivec2", "ivec3", "ivec4"};
2113 const auto meta = std::get_if<MetaTexture>(&operation.GetMeta());
2114 ASSERT(meta);
2115 UNIMPLEMENTED_IF(meta->sampler.is_array);
2116 const std::size_t count = operation.GetOperandsCount();
2117
2118 std::string expr = "texelFetch(";
2119 expr += GetSampler(meta->sampler);
2120 expr += ", ";
2121
2122 expr += constructors.at(operation.GetOperandsCount() + (meta->array ? 1 : 0) - 1);
2123 expr += '(';
2124 for (std::size_t i = 0; i < count; ++i) {
2125 if (i > 0) {
2126 expr += ", ";
2127 }
2128 expr += VisitOperand(operation, i).AsInt();
2129 }
2130 if (meta->array) {
2131 expr += ", ";
2132 expr += Visit(meta->array).AsInt();
2133 }
2134 expr += ')';
2135
2136 if (meta->lod && !meta->sampler.is_buffer) {
2137 expr += ", ";
2138 expr += Visit(meta->lod).AsInt();
2139 }
2140 expr += ')';
2141 expr += GetSwizzle(meta->element);
2142
2143 return {std::move(expr), Type::Float};
2144 }
2145
2146 Expression TextureGradient(Operation operation) {
2147 const auto& meta = std::get<MetaTexture>(operation.GetMeta());
2148 std::string expr =
2149 GenerateTexture(operation, "Grad", {TextureDerivates{}, TextureOffset{}});
2150 return {std::move(expr) + GetSwizzle(meta.element), Type::Float};
2151 }
2152
2153 Expression ImageLoad(Operation operation) {
2154 if (!device.HasImageLoadFormatted()) {
2155 LOG_ERROR(Render_OpenGL,
2156 "Device lacks GL_EXT_shader_image_load_formatted, stubbing image load");
2157 return {"0", Type::Int};
2158 }
2159
2160 const auto& meta{std::get<MetaImage>(operation.GetMeta())};
2161 return {fmt::format("imageLoad({}, {}){}", GetImage(meta.image),
2162 BuildIntegerCoordinates(operation), GetSwizzle(meta.element)),
2163 Type::Uint};
2164 }
2165
2166 Expression ImageStore(Operation operation) {
2167 const auto& meta{std::get<MetaImage>(operation.GetMeta())};
2168 code.AddLine("imageStore({}, {}, {});", GetImage(meta.image),
2169 BuildIntegerCoordinates(operation), BuildImageValues(operation));
2170 return {};
2171 }
2172
2173 template <const std::string_view& opname>
2174 Expression AtomicImage(Operation operation) {
2175 const auto& meta{std::get<MetaImage>(operation.GetMeta())};
2176 ASSERT(meta.values.size() == 1);
2177
2178 return {fmt::format("imageAtomic{}({}, {}, {})", opname, GetImage(meta.image),
2179 BuildIntegerCoordinates(operation), Visit(meta.values[0]).AsUint()),
2180 Type::Uint};
2181 }
2182
2183 template <const std::string_view& opname, Type type>
2184 Expression Atomic(Operation operation) {
2185 if ((opname == Func::Min || opname == Func::Max) && type == Type::Int) {
2186 UNIMPLEMENTED_MSG("Unimplemented Min & Max for atomic operations");
2187 return {};
2188 }
2189 return {fmt::format("atomic{}({}, {})", opname, Visit(operation[0]).GetCode(),
2190 Visit(operation[1]).AsUint()),
2191 Type::Uint};
2192 }
2193
2194 template <const std::string_view& opname, Type type>
2195 Expression Reduce(Operation operation) {
2196 code.AddLine("{};", Atomic<opname, type>(operation).GetCode());
2197 return {};
2198 }
2199
2200 Expression Branch(Operation operation) {
2201 const auto target = std::get_if<ImmediateNode>(&*operation[0]);
2202 UNIMPLEMENTED_IF(!target);
2203
2204 code.AddLine("jmp_to = 0x{:X}U;", target->GetValue());
2205 code.AddLine("break;");
2206 return {};
2207 }
2208
2209 Expression BranchIndirect(Operation operation) {
2210 const std::string op_a = VisitOperand(operation, 0).AsUint();
2211
2212 code.AddLine("jmp_to = {};", op_a);
2213 code.AddLine("break;");
2214 return {};
2215 }
2216
2217 Expression PushFlowStack(Operation operation) {
2218 const auto stack = std::get<MetaStackClass>(operation.GetMeta());
2219 const auto target = std::get_if<ImmediateNode>(&*operation[0]);
2220 UNIMPLEMENTED_IF(!target);
2221
2222 code.AddLine("{}[{}++] = 0x{:X}U;", FlowStackName(stack), FlowStackTopName(stack),
2223 target->GetValue());
2224 return {};
2225 }
2226
2227 Expression PopFlowStack(Operation operation) {
2228 const auto stack = std::get<MetaStackClass>(operation.GetMeta());
2229 code.AddLine("jmp_to = {}[--{}];", FlowStackName(stack), FlowStackTopName(stack));
2230 code.AddLine("break;");
2231 return {};
2232 }
2233
2234 void PreExit() {
2235 if (stage != ShaderType::Fragment) {
2236 return;
2237 }
2238 const auto& used_registers = ir.GetRegisters();
2239 const auto SafeGetRegister = [&](u32 reg) -> Expression {
2240 // TODO(Rodrigo): Replace with contains once C++20 releases
2241 if (used_registers.find(reg) != used_registers.end()) {
2242 return {GetRegister(reg), Type::Float};
2243 }
2244 return {"0.0f", Type::Float};
2245 };
2246
2247 UNIMPLEMENTED_IF_MSG(header.ps.omap.sample_mask != 0, "Sample mask write is unimplemented");
2248
2249 // Write the color outputs using the data in the shader registers, disabled
2250 // rendertargets/components are skipped in the register assignment.
2251 u32 current_reg = 0;
2252 for (u32 render_target = 0; render_target < Maxwell::NumRenderTargets; ++render_target) {
2253 // TODO(Subv): Figure out how dual-source blending is configured in the Switch.
2254 for (u32 component = 0; component < 4; ++component) {
2255 if (header.ps.IsColorComponentOutputEnabled(render_target, component)) {
2256 code.AddLine("frag_color{}{} = {};", render_target, GetColorSwizzle(component),
2257 SafeGetRegister(current_reg).AsFloat());
2258 ++current_reg;
2259 }
2260 }
2261 }
2262 if (header.ps.omap.depth) {
2263 // The depth output is always 2 registers after the last color output, and current_reg
2264 // already contains one past the last color register.
2265 code.AddLine("gl_FragDepth = {};", SafeGetRegister(current_reg + 1).AsFloat());
2266 }
2267 }
2268
2269 Expression Exit(Operation operation) {
2270 PreExit();
2271 code.AddLine("return;");
2272 return {};
2273 }
2274
2275 Expression Discard(Operation operation) {
2276 // Enclose "discard" in a conditional, so that GLSL compilation does not complain
2277 // about unexecuted instructions that may follow this.
2278 code.AddLine("if (true) {{");
2279 ++code.scope;
2280 code.AddLine("discard;");
2281 --code.scope;
2282 code.AddLine("}}");
2283 return {};
2284 }
2285
2286 Expression EmitVertex(Operation operation) {
2287 ASSERT_MSG(stage == ShaderType::Geometry,
2288 "EmitVertex is expected to be used in a geometry shader.");
2289 code.AddLine("EmitVertex();");
2290 return {};
2291 }
2292
2293 Expression EndPrimitive(Operation operation) {
2294 ASSERT_MSG(stage == ShaderType::Geometry,
2295 "EndPrimitive is expected to be used in a geometry shader.");
2296 code.AddLine("EndPrimitive();");
2297 return {};
2298 }
2299
2300 Expression InvocationId(Operation operation) {
2301 return {"gl_InvocationID", Type::Int};
2302 }
2303
2304 Expression YNegate(Operation operation) {
2305 // Y_NEGATE is mapped to this uniform value
2306 return {"gl_FrontMaterial.ambient.a", Type::Float};
2307 }
2308
2309 template <u32 element>
2310 Expression LocalInvocationId(Operation) {
2311 return {"gl_LocalInvocationID"s + GetSwizzle(element), Type::Uint};
2312 }
2313
2314 template <u32 element>
2315 Expression WorkGroupId(Operation) {
2316 return {"gl_WorkGroupID"s + GetSwizzle(element), Type::Uint};
2317 }
2318
2319 Expression BallotThread(Operation operation) {
2320 const std::string value = VisitOperand(operation, 0).AsBool();
2321 if (!device.HasWarpIntrinsics()) {
2322 LOG_ERROR(Render_OpenGL, "Nvidia vote intrinsics are required by this shader");
2323 // Stub on non-Nvidia devices by simulating all threads voting the same as the active
2324 // one.
2325 return {fmt::format("({} ? 0xFFFFFFFFU : 0U)", value), Type::Uint};
2326 }
2327 return {fmt::format("ballotThreadNV({})", value), Type::Uint};
2328 }
2329
2330 Expression Vote(Operation operation, const char* func) {
2331 const std::string value = VisitOperand(operation, 0).AsBool();
2332 if (!device.HasWarpIntrinsics()) {
2333 LOG_ERROR(Render_OpenGL, "Nvidia vote intrinsics are required by this shader");
2334 // Stub with a warp size of one.
2335 return {value, Type::Bool};
2336 }
2337 return {fmt::format("{}({})", func, value), Type::Bool};
2338 }
2339
2340 Expression VoteAll(Operation operation) {
2341 return Vote(operation, "allThreadsNV");
2342 }
2343
2344 Expression VoteAny(Operation operation) {
2345 return Vote(operation, "anyThreadNV");
2346 }
2347
2348 Expression VoteEqual(Operation operation) {
2349 if (!device.HasWarpIntrinsics()) {
2350 LOG_ERROR(Render_OpenGL, "Nvidia vote intrinsics are required by this shader");
2351 // We must return true here since a stub for a theoretical warp size of 1.
2352 // This will always return an equal result across all votes.
2353 return {"true", Type::Bool};
2354 }
2355 return Vote(operation, "allThreadsEqualNV");
2356 }
2357
2358 Expression ThreadId(Operation operation) {
2359 if (!device.HasShaderBallot()) {
2360 LOG_ERROR(Render_OpenGL, "Shader ballot is unavailable but required by the shader");
2361 return {"0U", Type::Uint};
2362 }
2363 return {"gl_SubGroupInvocationARB", Type::Uint};
2364 }
2365
2366 template <const std::string_view& comparison>
2367 Expression ThreadMask(Operation) {
2368 if (device.HasWarpIntrinsics()) {
2369 return {fmt::format("gl_Thread{}MaskNV", comparison), Type::Uint};
2370 }
2371 if (device.HasShaderBallot()) {
2372 return {fmt::format("uint(gl_SubGroup{}MaskARB)", comparison), Type::Uint};
2373 }
2374 LOG_ERROR(Render_OpenGL, "Thread mask intrinsics are required by the shader");
2375 return {"0U", Type::Uint};
2376 }
2377
2378 Expression ShuffleIndexed(Operation operation) {
2379 std::string value = VisitOperand(operation, 0).AsFloat();
2380
2381 if (!device.HasShaderBallot()) {
2382 LOG_ERROR(Render_OpenGL, "Shader ballot is unavailable but required by the shader");
2383 return {std::move(value), Type::Float};
2384 }
2385
2386 const std::string index = VisitOperand(operation, 1).AsUint();
2387 return {fmt::format("readInvocationARB({}, {})", value, index), Type::Float};
2388 }
2389
2390 Expression Barrier(Operation) {
2391 if (!ir.IsDecompiled()) {
2392 LOG_ERROR(Render_OpenGL, "barrier() used but shader is not decompiled");
2393 return {};
2394 }
2395 code.AddLine("barrier();");
2396 return {};
2397 }
2398
2399 Expression MemoryBarrierGroup(Operation) {
2400 code.AddLine("groupMemoryBarrier();");
2401 return {};
2402 }
2403
2404 Expression MemoryBarrierGlobal(Operation) {
2405 code.AddLine("memoryBarrier();");
2406 return {};
2407 }
2408
2409 struct Func final {
2410 Func() = delete;
2411 ~Func() = delete;
2412
2413 static constexpr std::string_view LessThan = "<";
2414 static constexpr std::string_view Equal = "==";
2415 static constexpr std::string_view LessEqual = "<=";
2416 static constexpr std::string_view GreaterThan = ">";
2417 static constexpr std::string_view NotEqual = "!=";
2418 static constexpr std::string_view GreaterEqual = ">=";
2419
2420 static constexpr std::string_view Eq = "Eq";
2421 static constexpr std::string_view Ge = "Ge";
2422 static constexpr std::string_view Gt = "Gt";
2423 static constexpr std::string_view Le = "Le";
2424 static constexpr std::string_view Lt = "Lt";
2425
2426 static constexpr std::string_view Add = "Add";
2427 static constexpr std::string_view Min = "Min";
2428 static constexpr std::string_view Max = "Max";
2429 static constexpr std::string_view And = "And";
2430 static constexpr std::string_view Or = "Or";
2431 static constexpr std::string_view Xor = "Xor";
2432 static constexpr std::string_view Exchange = "Exchange";
2433 };
2434
2435 static constexpr std::array operation_decompilers = {
2436 &GLSLDecompiler::Assign,
2437
2438 &GLSLDecompiler::Select,
2439
2440 &GLSLDecompiler::Add<Type::Float>,
2441 &GLSLDecompiler::Mul<Type::Float>,
2442 &GLSLDecompiler::Div<Type::Float>,
2443 &GLSLDecompiler::Fma<Type::Float>,
2444 &GLSLDecompiler::Negate<Type::Float>,
2445 &GLSLDecompiler::Absolute<Type::Float>,
2446 &GLSLDecompiler::FClamp,
2447 &GLSLDecompiler::FCastHalf0,
2448 &GLSLDecompiler::FCastHalf1,
2449 &GLSLDecompiler::Min<Type::Float>,
2450 &GLSLDecompiler::Max<Type::Float>,
2451 &GLSLDecompiler::FCos,
2452 &GLSLDecompiler::FSin,
2453 &GLSLDecompiler::FExp2,
2454 &GLSLDecompiler::FLog2,
2455 &GLSLDecompiler::FInverseSqrt,
2456 &GLSLDecompiler::FSqrt,
2457 &GLSLDecompiler::FRoundEven,
2458 &GLSLDecompiler::FFloor,
2459 &GLSLDecompiler::FCeil,
2460 &GLSLDecompiler::FTrunc,
2461 &GLSLDecompiler::FCastInteger<Type::Int>,
2462 &GLSLDecompiler::FCastInteger<Type::Uint>,
2463 &GLSLDecompiler::FSwizzleAdd,
2464
2465 &GLSLDecompiler::Add<Type::Int>,
2466 &GLSLDecompiler::Mul<Type::Int>,
2467 &GLSLDecompiler::Div<Type::Int>,
2468 &GLSLDecompiler::Negate<Type::Int>,
2469 &GLSLDecompiler::Absolute<Type::Int>,
2470 &GLSLDecompiler::Min<Type::Int>,
2471 &GLSLDecompiler::Max<Type::Int>,
2472
2473 &GLSLDecompiler::ICastFloat,
2474 &GLSLDecompiler::ICastUnsigned,
2475 &GLSLDecompiler::LogicalShiftLeft<Type::Int>,
2476 &GLSLDecompiler::ILogicalShiftRight,
2477 &GLSLDecompiler::IArithmeticShiftRight,
2478 &GLSLDecompiler::BitwiseAnd<Type::Int>,
2479 &GLSLDecompiler::BitwiseOr<Type::Int>,
2480 &GLSLDecompiler::BitwiseXor<Type::Int>,
2481 &GLSLDecompiler::BitwiseNot<Type::Int>,
2482 &GLSLDecompiler::BitfieldInsert<Type::Int>,
2483 &GLSLDecompiler::BitfieldExtract<Type::Int>,
2484 &GLSLDecompiler::BitCount<Type::Int>,
2485 &GLSLDecompiler::BitMSB<Type::Int>,
2486
2487 &GLSLDecompiler::Add<Type::Uint>,
2488 &GLSLDecompiler::Mul<Type::Uint>,
2489 &GLSLDecompiler::Div<Type::Uint>,
2490 &GLSLDecompiler::Min<Type::Uint>,
2491 &GLSLDecompiler::Max<Type::Uint>,
2492 &GLSLDecompiler::UCastFloat,
2493 &GLSLDecompiler::UCastSigned,
2494 &GLSLDecompiler::LogicalShiftLeft<Type::Uint>,
2495 &GLSLDecompiler::UShiftRight,
2496 &GLSLDecompiler::UShiftRight,
2497 &GLSLDecompiler::BitwiseAnd<Type::Uint>,
2498 &GLSLDecompiler::BitwiseOr<Type::Uint>,
2499 &GLSLDecompiler::BitwiseXor<Type::Uint>,
2500 &GLSLDecompiler::BitwiseNot<Type::Uint>,
2501 &GLSLDecompiler::BitfieldInsert<Type::Uint>,
2502 &GLSLDecompiler::BitfieldExtract<Type::Uint>,
2503 &GLSLDecompiler::BitCount<Type::Uint>,
2504 &GLSLDecompiler::BitMSB<Type::Uint>,
2505
2506 &GLSLDecompiler::Add<Type::HalfFloat>,
2507 &GLSLDecompiler::Mul<Type::HalfFloat>,
2508 &GLSLDecompiler::Fma<Type::HalfFloat>,
2509 &GLSLDecompiler::Absolute<Type::HalfFloat>,
2510 &GLSLDecompiler::HNegate,
2511 &GLSLDecompiler::HClamp,
2512 &GLSLDecompiler::HCastFloat,
2513 &GLSLDecompiler::HUnpack,
2514 &GLSLDecompiler::HMergeF32,
2515 &GLSLDecompiler::HMergeH0,
2516 &GLSLDecompiler::HMergeH1,
2517 &GLSLDecompiler::HPack2,
2518
2519 &GLSLDecompiler::LogicalAssign,
2520 &GLSLDecompiler::LogicalAnd,
2521 &GLSLDecompiler::LogicalOr,
2522 &GLSLDecompiler::LogicalXor,
2523 &GLSLDecompiler::LogicalNegate,
2524 &GLSLDecompiler::LogicalPick2,
2525 &GLSLDecompiler::LogicalAnd2,
2526
2527 &GLSLDecompiler::Comparison<Func::LessThan, Type::Float, false>,
2528 &GLSLDecompiler::Comparison<Func::Equal, Type::Float, false>,
2529 &GLSLDecompiler::Comparison<Func::LessEqual, Type::Float, false>,
2530 &GLSLDecompiler::Comparison<Func::GreaterThan, Type::Float, false>,
2531 &GLSLDecompiler::Comparison<Func::NotEqual, Type::Float, false>,
2532 &GLSLDecompiler::Comparison<Func::GreaterEqual, Type::Float, false>,
2533 &GLSLDecompiler::FOrdered,
2534 &GLSLDecompiler::FUnordered,
2535 &GLSLDecompiler::Comparison<Func::LessThan, Type::Float, true>,
2536 &GLSLDecompiler::Comparison<Func::Equal, Type::Float, true>,
2537 &GLSLDecompiler::Comparison<Func::LessEqual, Type::Float, true>,
2538 &GLSLDecompiler::Comparison<Func::GreaterThan, Type::Float, true>,
2539 &GLSLDecompiler::Comparison<Func::NotEqual, Type::Float, true>,
2540 &GLSLDecompiler::Comparison<Func::GreaterEqual, Type::Float, true>,
2541
2542 &GLSLDecompiler::Comparison<Func::LessThan, Type::Int>,
2543 &GLSLDecompiler::Comparison<Func::Equal, Type::Int>,
2544 &GLSLDecompiler::Comparison<Func::LessEqual, Type::Int>,
2545 &GLSLDecompiler::Comparison<Func::GreaterThan, Type::Int>,
2546 &GLSLDecompiler::Comparison<Func::NotEqual, Type::Int>,
2547 &GLSLDecompiler::Comparison<Func::GreaterEqual, Type::Int>,
2548
2549 &GLSLDecompiler::Comparison<Func::LessThan, Type::Uint>,
2550 &GLSLDecompiler::Comparison<Func::Equal, Type::Uint>,
2551 &GLSLDecompiler::Comparison<Func::LessEqual, Type::Uint>,
2552 &GLSLDecompiler::Comparison<Func::GreaterThan, Type::Uint>,
2553 &GLSLDecompiler::Comparison<Func::NotEqual, Type::Uint>,
2554 &GLSLDecompiler::Comparison<Func::GreaterEqual, Type::Uint>,
2555
2556 &GLSLDecompiler::LogicalAddCarry,
2557
2558 &GLSLDecompiler::Logical2HLessThan<false>,
2559 &GLSLDecompiler::Logical2HEqual<false>,
2560 &GLSLDecompiler::Logical2HLessEqual<false>,
2561 &GLSLDecompiler::Logical2HGreaterThan<false>,
2562 &GLSLDecompiler::Logical2HNotEqual<false>,
2563 &GLSLDecompiler::Logical2HGreaterEqual<false>,
2564 &GLSLDecompiler::Logical2HLessThan<true>,
2565 &GLSLDecompiler::Logical2HEqual<true>,
2566 &GLSLDecompiler::Logical2HLessEqual<true>,
2567 &GLSLDecompiler::Logical2HGreaterThan<true>,
2568 &GLSLDecompiler::Logical2HNotEqual<true>,
2569 &GLSLDecompiler::Logical2HGreaterEqual<true>,
2570
2571 &GLSLDecompiler::Texture,
2572 &GLSLDecompiler::TextureLod,
2573 &GLSLDecompiler::TextureGather,
2574 &GLSLDecompiler::TextureQueryDimensions,
2575 &GLSLDecompiler::TextureQueryLod,
2576 &GLSLDecompiler::TexelFetch,
2577 &GLSLDecompiler::TextureGradient,
2578
2579 &GLSLDecompiler::ImageLoad,
2580 &GLSLDecompiler::ImageStore,
2581
2582 &GLSLDecompiler::AtomicImage<Func::Add>,
2583 &GLSLDecompiler::AtomicImage<Func::And>,
2584 &GLSLDecompiler::AtomicImage<Func::Or>,
2585 &GLSLDecompiler::AtomicImage<Func::Xor>,
2586 &GLSLDecompiler::AtomicImage<Func::Exchange>,
2587
2588 &GLSLDecompiler::Atomic<Func::Exchange, Type::Uint>,
2589 &GLSLDecompiler::Atomic<Func::Add, Type::Uint>,
2590 &GLSLDecompiler::Atomic<Func::Min, Type::Uint>,
2591 &GLSLDecompiler::Atomic<Func::Max, Type::Uint>,
2592 &GLSLDecompiler::Atomic<Func::And, Type::Uint>,
2593 &GLSLDecompiler::Atomic<Func::Or, Type::Uint>,
2594 &GLSLDecompiler::Atomic<Func::Xor, Type::Uint>,
2595
2596 &GLSLDecompiler::Atomic<Func::Exchange, Type::Int>,
2597 &GLSLDecompiler::Atomic<Func::Add, Type::Int>,
2598 &GLSLDecompiler::Atomic<Func::Min, Type::Int>,
2599 &GLSLDecompiler::Atomic<Func::Max, Type::Int>,
2600 &GLSLDecompiler::Atomic<Func::And, Type::Int>,
2601 &GLSLDecompiler::Atomic<Func::Or, Type::Int>,
2602 &GLSLDecompiler::Atomic<Func::Xor, Type::Int>,
2603
2604 &GLSLDecompiler::Reduce<Func::Add, Type::Uint>,
2605 &GLSLDecompiler::Reduce<Func::Min, Type::Uint>,
2606 &GLSLDecompiler::Reduce<Func::Max, Type::Uint>,
2607 &GLSLDecompiler::Reduce<Func::And, Type::Uint>,
2608 &GLSLDecompiler::Reduce<Func::Or, Type::Uint>,
2609 &GLSLDecompiler::Reduce<Func::Xor, Type::Uint>,
2610
2611 &GLSLDecompiler::Reduce<Func::Add, Type::Int>,
2612 &GLSLDecompiler::Reduce<Func::Min, Type::Int>,
2613 &GLSLDecompiler::Reduce<Func::Max, Type::Int>,
2614 &GLSLDecompiler::Reduce<Func::And, Type::Int>,
2615 &GLSLDecompiler::Reduce<Func::Or, Type::Int>,
2616 &GLSLDecompiler::Reduce<Func::Xor, Type::Int>,
2617
2618 &GLSLDecompiler::Branch,
2619 &GLSLDecompiler::BranchIndirect,
2620 &GLSLDecompiler::PushFlowStack,
2621 &GLSLDecompiler::PopFlowStack,
2622 &GLSLDecompiler::Exit,
2623 &GLSLDecompiler::Discard,
2624
2625 &GLSLDecompiler::EmitVertex,
2626 &GLSLDecompiler::EndPrimitive,
2627
2628 &GLSLDecompiler::InvocationId,
2629 &GLSLDecompiler::YNegate,
2630 &GLSLDecompiler::LocalInvocationId<0>,
2631 &GLSLDecompiler::LocalInvocationId<1>,
2632 &GLSLDecompiler::LocalInvocationId<2>,
2633 &GLSLDecompiler::WorkGroupId<0>,
2634 &GLSLDecompiler::WorkGroupId<1>,
2635 &GLSLDecompiler::WorkGroupId<2>,
2636
2637 &GLSLDecompiler::BallotThread,
2638 &GLSLDecompiler::VoteAll,
2639 &GLSLDecompiler::VoteAny,
2640 &GLSLDecompiler::VoteEqual,
2641
2642 &GLSLDecompiler::ThreadId,
2643 &GLSLDecompiler::ThreadMask<Func::Eq>,
2644 &GLSLDecompiler::ThreadMask<Func::Ge>,
2645 &GLSLDecompiler::ThreadMask<Func::Gt>,
2646 &GLSLDecompiler::ThreadMask<Func::Le>,
2647 &GLSLDecompiler::ThreadMask<Func::Lt>,
2648 &GLSLDecompiler::ShuffleIndexed,
2649
2650 &GLSLDecompiler::Barrier,
2651 &GLSLDecompiler::MemoryBarrierGroup,
2652 &GLSLDecompiler::MemoryBarrierGlobal,
2653 };
2654 static_assert(operation_decompilers.size() == static_cast<std::size_t>(OperationCode::Amount));
2655
2656 std::string GetRegister(u32 index) const {
2657 return AppendSuffix(index, "gpr");
2658 }
2659
2660 std::string GetCustomVariable(u32 index) const {
2661 return AppendSuffix(index, "custom_var");
2662 }
2663
2664 std::string GetPredicate(Tegra::Shader::Pred pred) const {
2665 return AppendSuffix(static_cast<u32>(pred), "pred");
2666 }
2667
2668 std::string GetGenericInputAttribute(Attribute::Index attribute) const {
2669 return AppendSuffix(GetGenericAttributeIndex(attribute), INPUT_ATTRIBUTE_NAME);
2670 }
2671
2672 std::unordered_map<u8, GenericVaryingDescription> varying_description;
2673
2674 std::string GetGenericOutputAttribute(Attribute::Index attribute, std::size_t element) const {
2675 const u8 offset = static_cast<u8>(GetGenericAttributeIndex(attribute) * 4 + element);
2676 const auto& description = varying_description.at(offset);
2677 if (description.is_scalar) {
2678 return description.name;
2679 }
2680 return fmt::format("{}[{}]", description.name, element - description.first_element);
2681 }
2682
2683 std::string GetConstBuffer(u32 index) const {
2684 return AppendSuffix(index, "cbuf");
2685 }
2686
2687 std::string GetGlobalMemory(const GlobalMemoryBase& descriptor) const {
2688 return fmt::format("gmem_{}_{}_{}", descriptor.cbuf_index, descriptor.cbuf_offset, suffix);
2689 }
2690
2691 std::string GetGlobalMemoryBlock(const GlobalMemoryBase& descriptor) const {
2692 return fmt::format("gmem_block_{}_{}_{}", descriptor.cbuf_index, descriptor.cbuf_offset,
2693 suffix);
2694 }
2695
2696 std::string GetConstBufferBlock(u32 index) const {
2697 return AppendSuffix(index, "cbuf_block");
2698 }
2699
2700 std::string GetLocalMemory() const {
2701 if (suffix.empty()) {
2702 return "lmem";
2703 } else {
2704 return "lmem_" + std::string{suffix};
2705 }
2706 }
2707
2708 std::string GetInternalFlag(InternalFlag flag) const {
2709 constexpr std::array InternalFlagNames = {"zero_flag", "sign_flag", "carry_flag",
2710 "overflow_flag"};
2711 const auto index = static_cast<u32>(flag);
2712 ASSERT(index < static_cast<u32>(InternalFlag::Amount));
2713
2714 if (suffix.empty()) {
2715 return InternalFlagNames[index];
2716 } else {
2717 return fmt::format("{}_{}", InternalFlagNames[index], suffix);
2718 }
2719 }
2720
2721 std::string GetSampler(const SamplerEntry& sampler) const {
2722 return AppendSuffix(sampler.index, "sampler");
2723 }
2724
2725 std::string GetImage(const ImageEntry& image) const {
2726 return AppendSuffix(image.index, "image");
2727 }
2728
2729 std::string AppendSuffix(u32 index, std::string_view name) const {
2730 if (suffix.empty()) {
2731 return fmt::format("{}{}", name, index);
2732 } else {
2733 return fmt::format("{}{}_{}", name, index, suffix);
2734 }
2735 }
2736
2737 u32 GetNumPhysicalInputAttributes() const {
2738 return stage == ShaderType::Vertex ? GetNumPhysicalAttributes() : GetNumPhysicalVaryings();
2739 }
2740
2741 u32 GetNumPhysicalAttributes() const {
2742 return std::min<u32>(device.GetMaxVertexAttributes(), Maxwell::NumVertexAttributes);
2743 }
2744
2745 u32 GetNumPhysicalVaryings() const {
2746 return std::min<u32>(device.GetMaxVaryings(), Maxwell::NumVaryings);
2747 }
2748
2749 const Device& device;
2750 const ShaderIR& ir;
2751 const Registry& registry;
2752 const ShaderType stage;
2753 const std::string_view identifier;
2754 const std::string_view suffix;
2755 const Header header;
2756 std::unordered_map<u8, VaryingTFB> transform_feedback;
2757
2758 ShaderWriter code;
2759
2760 std::optional<u32> max_input_vertices;
2761};
2762
2763std::string GetFlowVariable(u32 index) {
2764 return fmt::format("flow_var{}", index);
2765}
2766
2767class ExprDecompiler {
2768public:
2769 explicit ExprDecompiler(GLSLDecompiler& decomp_) : decomp{decomp_} {}
2770
2771 void operator()(const ExprAnd& expr) {
2772 inner += '(';
2773 std::visit(*this, *expr.operand1);
2774 inner += " && ";
2775 std::visit(*this, *expr.operand2);
2776 inner += ')';
2777 }
2778
2779 void operator()(const ExprOr& expr) {
2780 inner += '(';
2781 std::visit(*this, *expr.operand1);
2782 inner += " || ";
2783 std::visit(*this, *expr.operand2);
2784 inner += ')';
2785 }
2786
2787 void operator()(const ExprNot& expr) {
2788 inner += '!';
2789 std::visit(*this, *expr.operand1);
2790 }
2791
2792 void operator()(const ExprPredicate& expr) {
2793 const auto pred = static_cast<Tegra::Shader::Pred>(expr.predicate);
2794 inner += decomp.GetPredicate(pred);
2795 }
2796
2797 void operator()(const ExprCondCode& expr) {
2798 inner += decomp.Visit(decomp.ir.GetConditionCode(expr.cc)).AsBool();
2799 }
2800
2801 void operator()(const ExprVar& expr) {
2802 inner += GetFlowVariable(expr.var_index);
2803 }
2804
2805 void operator()(const ExprBoolean& expr) {
2806 inner += expr.value ? "true" : "false";
2807 }
2808
2809 void operator()(VideoCommon::Shader::ExprGprEqual& expr) {
2810 inner += fmt::format("(ftou({}) == {})", decomp.GetRegister(expr.gpr), expr.value);
2811 }
2812
2813 const std::string& GetResult() const {
2814 return inner;
2815 }
2816
2817private:
2818 GLSLDecompiler& decomp;
2819 std::string inner;
2820};
2821
2822class ASTDecompiler {
2823public:
2824 explicit ASTDecompiler(GLSLDecompiler& decomp_) : decomp{decomp_} {}
2825
2826 void operator()(const ASTProgram& ast) {
2827 ASTNode current = ast.nodes.GetFirst();
2828 while (current) {
2829 Visit(current);
2830 current = current->GetNext();
2831 }
2832 }
2833
2834 void operator()(const ASTIfThen& ast) {
2835 ExprDecompiler expr_parser{decomp};
2836 std::visit(expr_parser, *ast.condition);
2837 decomp.code.AddLine("if ({}) {{", expr_parser.GetResult());
2838 decomp.code.scope++;
2839 ASTNode current = ast.nodes.GetFirst();
2840 while (current) {
2841 Visit(current);
2842 current = current->GetNext();
2843 }
2844 decomp.code.scope--;
2845 decomp.code.AddLine("}}");
2846 }
2847
2848 void operator()(const ASTIfElse& ast) {
2849 decomp.code.AddLine("else {{");
2850 decomp.code.scope++;
2851 ASTNode current = ast.nodes.GetFirst();
2852 while (current) {
2853 Visit(current);
2854 current = current->GetNext();
2855 }
2856 decomp.code.scope--;
2857 decomp.code.AddLine("}}");
2858 }
2859
2860 void operator()([[maybe_unused]] const ASTBlockEncoded& ast) {
2861 UNREACHABLE();
2862 }
2863
2864 void operator()(const ASTBlockDecoded& ast) {
2865 decomp.VisitBlock(ast.nodes);
2866 }
2867
2868 void operator()(const ASTVarSet& ast) {
2869 ExprDecompiler expr_parser{decomp};
2870 std::visit(expr_parser, *ast.condition);
2871 decomp.code.AddLine("{} = {};", GetFlowVariable(ast.index), expr_parser.GetResult());
2872 }
2873
2874 void operator()(const ASTLabel& ast) {
2875 decomp.code.AddLine("// Label_{}:", ast.index);
2876 }
2877
2878 void operator()([[maybe_unused]] const ASTGoto& ast) {
2879 UNREACHABLE();
2880 }
2881
2882 void operator()(const ASTDoWhile& ast) {
2883 ExprDecompiler expr_parser{decomp};
2884 std::visit(expr_parser, *ast.condition);
2885 decomp.code.AddLine("do {{");
2886 decomp.code.scope++;
2887 ASTNode current = ast.nodes.GetFirst();
2888 while (current) {
2889 Visit(current);
2890 current = current->GetNext();
2891 }
2892 decomp.code.scope--;
2893 decomp.code.AddLine("}} while({});", expr_parser.GetResult());
2894 }
2895
2896 void operator()(const ASTReturn& ast) {
2897 const bool is_true = VideoCommon::Shader::ExprIsTrue(ast.condition);
2898 if (!is_true) {
2899 ExprDecompiler expr_parser{decomp};
2900 std::visit(expr_parser, *ast.condition);
2901 decomp.code.AddLine("if ({}) {{", expr_parser.GetResult());
2902 decomp.code.scope++;
2903 }
2904 if (ast.kills) {
2905 decomp.code.AddLine("discard;");
2906 } else {
2907 decomp.PreExit();
2908 decomp.code.AddLine("return;");
2909 }
2910 if (!is_true) {
2911 decomp.code.scope--;
2912 decomp.code.AddLine("}}");
2913 }
2914 }
2915
2916 void operator()(const ASTBreak& ast) {
2917 const bool is_true = VideoCommon::Shader::ExprIsTrue(ast.condition);
2918 if (!is_true) {
2919 ExprDecompiler expr_parser{decomp};
2920 std::visit(expr_parser, *ast.condition);
2921 decomp.code.AddLine("if ({}) {{", expr_parser.GetResult());
2922 decomp.code.scope++;
2923 }
2924 decomp.code.AddLine("break;");
2925 if (!is_true) {
2926 decomp.code.scope--;
2927 decomp.code.AddLine("}}");
2928 }
2929 }
2930
2931 void Visit(const ASTNode& node) {
2932 std::visit(*this, *node->GetInnerData());
2933 }
2934
2935private:
2936 GLSLDecompiler& decomp;
2937};
2938
2939void GLSLDecompiler::DecompileAST() {
2940 const u32 num_flow_variables = ir.GetASTNumVariables();
2941 for (u32 i = 0; i < num_flow_variables; i++) {
2942 code.AddLine("bool {} = false;", GetFlowVariable(i));
2943 }
2944
2945 ASTDecompiler decompiler{*this};
2946 decompiler.Visit(ir.GetASTProgram());
2947}
2948
2949} // Anonymous namespace
2950
2951ShaderEntries MakeEntries(const Device& device, const ShaderIR& ir, ShaderType stage) {
2952 ShaderEntries entries;
2953 for (const auto& cbuf : ir.GetConstantBuffers()) {
2954 entries.const_buffers.emplace_back(cbuf.second.GetMaxOffset(), cbuf.second.IsIndirect(),
2955 cbuf.first);
2956 }
2957 for (const auto& [base, usage] : ir.GetGlobalMemory()) {
2958 entries.global_memory_entries.emplace_back(base.cbuf_index, base.cbuf_offset, usage.is_read,
2959 usage.is_written);
2960 }
2961 for (const auto& sampler : ir.GetSamplers()) {
2962 entries.samplers.emplace_back(sampler);
2963 }
2964 for (const auto& image : ir.GetImages()) {
2965 entries.images.emplace_back(image);
2966 }
2967 const auto clip_distances = ir.GetClipDistances();
2968 for (std::size_t i = 0; i < std::size(clip_distances); ++i) {
2969 entries.clip_distances = (clip_distances[i] ? 1U : 0U) << i;
2970 }
2971 for (const auto& buffer : entries.const_buffers) {
2972 entries.enabled_uniform_buffers |= 1U << buffer.GetIndex();
2973 }
2974 entries.shader_length = ir.GetLength();
2975 return entries;
2976}
2977
2978std::string DecompileShader(const Device& device, const ShaderIR& ir, const Registry& registry,
2979 ShaderType stage, std::string_view identifier,
2980 std::string_view suffix) {
2981 GLSLDecompiler decompiler(device, ir, registry, stage, identifier, suffix);
2982 decompiler.Decompile();
2983 return decompiler.GetResult();
2984}
2985
2986} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.h b/src/video_core/renderer_opengl/gl_shader_decompiler.h
deleted file mode 100644
index 0397a000c..000000000
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.h
+++ /dev/null
@@ -1,69 +0,0 @@
1// Copyright 2018 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <array>
8#include <string>
9#include <string_view>
10#include <utility>
11#include <vector>
12#include "common/common_types.h"
13#include "video_core/engines/maxwell_3d.h"
14#include "video_core/engines/shader_type.h"
15#include "video_core/shader/registry.h"
16#include "video_core/shader/shader_ir.h"
17
18namespace OpenGL {
19
20class Device;
21
22using Maxwell = Tegra::Engines::Maxwell3D::Regs;
23using SamplerEntry = VideoCommon::Shader::SamplerEntry;
24using ImageEntry = VideoCommon::Shader::ImageEntry;
25
26class ConstBufferEntry : public VideoCommon::Shader::ConstBuffer {
27public:
28 explicit ConstBufferEntry(u32 max_offset_, bool is_indirect_, u32 index_)
29 : ConstBuffer{max_offset_, is_indirect_}, index{index_} {}
30
31 u32 GetIndex() const {
32 return index;
33 }
34
35private:
36 u32 index = 0;
37};
38
39struct GlobalMemoryEntry {
40 constexpr explicit GlobalMemoryEntry(u32 cbuf_index_, u32 cbuf_offset_, bool is_read_,
41 bool is_written_)
42 : cbuf_index{cbuf_index_}, cbuf_offset{cbuf_offset_}, is_read{is_read_}, is_written{
43 is_written_} {}
44
45 u32 cbuf_index = 0;
46 u32 cbuf_offset = 0;
47 bool is_read = false;
48 bool is_written = false;
49};
50
51struct ShaderEntries {
52 std::vector<ConstBufferEntry> const_buffers;
53 std::vector<GlobalMemoryEntry> global_memory_entries;
54 std::vector<SamplerEntry> samplers;
55 std::vector<ImageEntry> images;
56 std::size_t shader_length{};
57 u32 clip_distances{};
58 u32 enabled_uniform_buffers{};
59};
60
61ShaderEntries MakeEntries(const Device& device, const VideoCommon::Shader::ShaderIR& ir,
62 Tegra::Engines::ShaderType stage);
63
64std::string DecompileShader(const Device& device, const VideoCommon::Shader::ShaderIR& ir,
65 const VideoCommon::Shader::Registry& registry,
66 Tegra::Engines::ShaderType stage, std::string_view identifier,
67 std::string_view suffix = {});
68
69} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp
deleted file mode 100644
index 0deb86517..000000000
--- a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp
+++ /dev/null
@@ -1,482 +0,0 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <cstring>
6
7#include <fmt/format.h>
8
9#include "common/assert.h"
10#include "common/common_types.h"
11#include "common/fs/file.h"
12#include "common/fs/fs.h"
13#include "common/fs/path_util.h"
14#include "common/logging/log.h"
15#include "common/scm_rev.h"
16#include "common/settings.h"
17#include "common/zstd_compression.h"
18#include "core/core.h"
19#include "core/hle/kernel/k_process.h"
20#include "video_core/engines/shader_type.h"
21#include "video_core/renderer_opengl/gl_shader_cache.h"
22#include "video_core/renderer_opengl/gl_shader_disk_cache.h"
23
24namespace OpenGL {
25
26using Tegra::Engines::ShaderType;
27using VideoCommon::Shader::BindlessSamplerMap;
28using VideoCommon::Shader::BoundSamplerMap;
29using VideoCommon::Shader::KeyMap;
30using VideoCommon::Shader::SeparateSamplerKey;
31using ShaderCacheVersionHash = std::array<u8, 64>;
32
33struct ConstBufferKey {
34 u32 cbuf = 0;
35 u32 offset = 0;
36 u32 value = 0;
37};
38
39struct BoundSamplerEntry {
40 u32 offset = 0;
41 Tegra::Engines::SamplerDescriptor sampler;
42};
43
44struct SeparateSamplerEntry {
45 u32 cbuf1 = 0;
46 u32 cbuf2 = 0;
47 u32 offset1 = 0;
48 u32 offset2 = 0;
49 Tegra::Engines::SamplerDescriptor sampler;
50};
51
52struct BindlessSamplerEntry {
53 u32 cbuf = 0;
54 u32 offset = 0;
55 Tegra::Engines::SamplerDescriptor sampler;
56};
57
58namespace {
59
60constexpr u32 NativeVersion = 21;
61
62ShaderCacheVersionHash GetShaderCacheVersionHash() {
63 ShaderCacheVersionHash hash{};
64 const std::size_t length = std::min(std::strlen(Common::g_shader_cache_version), hash.size());
65 std::memcpy(hash.data(), Common::g_shader_cache_version, length);
66 return hash;
67}
68
69} // Anonymous namespace
70
71ShaderDiskCacheEntry::ShaderDiskCacheEntry() = default;
72
73ShaderDiskCacheEntry::~ShaderDiskCacheEntry() = default;
74
75bool ShaderDiskCacheEntry::Load(Common::FS::IOFile& file) {
76 if (!file.ReadObject(type)) {
77 return false;
78 }
79 u32 code_size;
80 u32 code_size_b;
81 if (!file.ReadObject(code_size) || !file.ReadObject(code_size_b)) {
82 return false;
83 }
84 code.resize(code_size);
85 code_b.resize(code_size_b);
86 if (file.Read(code) != code_size) {
87 return false;
88 }
89 if (HasProgramA() && file.Read(code_b) != code_size_b) {
90 return false;
91 }
92
93 u8 is_texture_handler_size_known;
94 u32 texture_handler_size_value;
95 u32 num_keys;
96 u32 num_bound_samplers;
97 u32 num_separate_samplers;
98 u32 num_bindless_samplers;
99 if (!file.ReadObject(unique_identifier) || !file.ReadObject(bound_buffer) ||
100 !file.ReadObject(is_texture_handler_size_known) ||
101 !file.ReadObject(texture_handler_size_value) || !file.ReadObject(graphics_info) ||
102 !file.ReadObject(compute_info) || !file.ReadObject(num_keys) ||
103 !file.ReadObject(num_bound_samplers) || !file.ReadObject(num_separate_samplers) ||
104 !file.ReadObject(num_bindless_samplers)) {
105 return false;
106 }
107 if (is_texture_handler_size_known) {
108 texture_handler_size = texture_handler_size_value;
109 }
110
111 std::vector<ConstBufferKey> flat_keys(num_keys);
112 std::vector<BoundSamplerEntry> flat_bound_samplers(num_bound_samplers);
113 std::vector<SeparateSamplerEntry> flat_separate_samplers(num_separate_samplers);
114 std::vector<BindlessSamplerEntry> flat_bindless_samplers(num_bindless_samplers);
115 if (file.Read(flat_keys) != flat_keys.size() ||
116 file.Read(flat_bound_samplers) != flat_bound_samplers.size() ||
117 file.Read(flat_separate_samplers) != flat_separate_samplers.size() ||
118 file.Read(flat_bindless_samplers) != flat_bindless_samplers.size()) {
119 return false;
120 }
121 for (const auto& entry : flat_keys) {
122 keys.insert({{entry.cbuf, entry.offset}, entry.value});
123 }
124 for (const auto& entry : flat_bound_samplers) {
125 bound_samplers.emplace(entry.offset, entry.sampler);
126 }
127 for (const auto& entry : flat_separate_samplers) {
128 SeparateSamplerKey key;
129 key.buffers = {entry.cbuf1, entry.cbuf2};
130 key.offsets = {entry.offset1, entry.offset2};
131 separate_samplers.emplace(key, entry.sampler);
132 }
133 for (const auto& entry : flat_bindless_samplers) {
134 bindless_samplers.insert({{entry.cbuf, entry.offset}, entry.sampler});
135 }
136
137 return true;
138}
139
140bool ShaderDiskCacheEntry::Save(Common::FS::IOFile& file) const {
141 if (!file.WriteObject(static_cast<u32>(type)) ||
142 !file.WriteObject(static_cast<u32>(code.size())) ||
143 !file.WriteObject(static_cast<u32>(code_b.size()))) {
144 return false;
145 }
146 if (file.Write(code) != code.size()) {
147 return false;
148 }
149 if (HasProgramA() && file.Write(code_b) != code_b.size()) {
150 return false;
151 }
152
153 if (!file.WriteObject(unique_identifier) || !file.WriteObject(bound_buffer) ||
154 !file.WriteObject(static_cast<u8>(texture_handler_size.has_value())) ||
155 !file.WriteObject(texture_handler_size.value_or(0)) || !file.WriteObject(graphics_info) ||
156 !file.WriteObject(compute_info) || !file.WriteObject(static_cast<u32>(keys.size())) ||
157 !file.WriteObject(static_cast<u32>(bound_samplers.size())) ||
158 !file.WriteObject(static_cast<u32>(separate_samplers.size())) ||
159 !file.WriteObject(static_cast<u32>(bindless_samplers.size()))) {
160 return false;
161 }
162
163 std::vector<ConstBufferKey> flat_keys;
164 flat_keys.reserve(keys.size());
165 for (const auto& [address, value] : keys) {
166 flat_keys.push_back(ConstBufferKey{address.first, address.second, value});
167 }
168
169 std::vector<BoundSamplerEntry> flat_bound_samplers;
170 flat_bound_samplers.reserve(bound_samplers.size());
171 for (const auto& [address, sampler] : bound_samplers) {
172 flat_bound_samplers.push_back(BoundSamplerEntry{address, sampler});
173 }
174
175 std::vector<SeparateSamplerEntry> flat_separate_samplers;
176 flat_separate_samplers.reserve(separate_samplers.size());
177 for (const auto& [key, sampler] : separate_samplers) {
178 SeparateSamplerEntry entry;
179 std::tie(entry.cbuf1, entry.cbuf2) = key.buffers;
180 std::tie(entry.offset1, entry.offset2) = key.offsets;
181 entry.sampler = sampler;
182 flat_separate_samplers.push_back(entry);
183 }
184
185 std::vector<BindlessSamplerEntry> flat_bindless_samplers;
186 flat_bindless_samplers.reserve(bindless_samplers.size());
187 for (const auto& [address, sampler] : bindless_samplers) {
188 flat_bindless_samplers.push_back(
189 BindlessSamplerEntry{address.first, address.second, sampler});
190 }
191
192 return file.Write(flat_keys) == flat_keys.size() &&
193 file.Write(flat_bound_samplers) == flat_bound_samplers.size() &&
194 file.Write(flat_separate_samplers) == flat_separate_samplers.size() &&
195 file.Write(flat_bindless_samplers) == flat_bindless_samplers.size();
196}
197
198ShaderDiskCacheOpenGL::ShaderDiskCacheOpenGL() = default;
199
200ShaderDiskCacheOpenGL::~ShaderDiskCacheOpenGL() = default;
201
202void ShaderDiskCacheOpenGL::BindTitleID(u64 title_id_) {
203 title_id = title_id_;
204}
205
206std::optional<std::vector<ShaderDiskCacheEntry>> ShaderDiskCacheOpenGL::LoadTransferable() {
207 // Skip games without title id
208 const bool has_title_id = title_id != 0;
209 if (!Settings::values.use_disk_shader_cache.GetValue() || !has_title_id) {
210 return std::nullopt;
211 }
212
213 Common::FS::IOFile file{GetTransferablePath(), Common::FS::FileAccessMode::Read,
214 Common::FS::FileType::BinaryFile};
215 if (!file.IsOpen()) {
216 LOG_INFO(Render_OpenGL, "No transferable shader cache found");
217 is_usable = true;
218 return std::nullopt;
219 }
220
221 u32 version{};
222 if (!file.ReadObject(version)) {
223 LOG_ERROR(Render_OpenGL, "Failed to get transferable cache version, skipping it");
224 return std::nullopt;
225 }
226
227 if (version < NativeVersion) {
228 LOG_INFO(Render_OpenGL, "Transferable shader cache is old, removing");
229 file.Close();
230 InvalidateTransferable();
231 is_usable = true;
232 return std::nullopt;
233 }
234 if (version > NativeVersion) {
235 LOG_WARNING(Render_OpenGL, "Transferable shader cache was generated with a newer version "
236 "of the emulator, skipping");
237 return std::nullopt;
238 }
239
240 // Version is valid, load the shaders
241 std::vector<ShaderDiskCacheEntry> entries;
242 while (static_cast<u64>(file.Tell()) < file.GetSize()) {
243 ShaderDiskCacheEntry& entry = entries.emplace_back();
244 if (!entry.Load(file)) {
245 LOG_ERROR(Render_OpenGL, "Failed to load transferable raw entry, skipping");
246 return std::nullopt;
247 }
248 }
249
250 is_usable = true;
251 return {std::move(entries)};
252}
253
254std::vector<ShaderDiskCachePrecompiled> ShaderDiskCacheOpenGL::LoadPrecompiled() {
255 if (!is_usable) {
256 return {};
257 }
258
259 Common::FS::IOFile file{GetPrecompiledPath(), Common::FS::FileAccessMode::Read,
260 Common::FS::FileType::BinaryFile};
261 if (!file.IsOpen()) {
262 LOG_INFO(Render_OpenGL, "No precompiled shader cache found");
263 return {};
264 }
265
266 if (const auto result = LoadPrecompiledFile(file)) {
267 return *result;
268 }
269
270 LOG_INFO(Render_OpenGL, "Failed to load precompiled cache");
271 file.Close();
272 InvalidatePrecompiled();
273 return {};
274}
275
276std::optional<std::vector<ShaderDiskCachePrecompiled>> ShaderDiskCacheOpenGL::LoadPrecompiledFile(
277 Common::FS::IOFile& file) {
278 // Read compressed file from disk and decompress to virtual precompiled cache file
279 std::vector<u8> compressed(file.GetSize());
280 if (file.Read(compressed) != file.GetSize()) {
281 return std::nullopt;
282 }
283 const std::vector<u8> decompressed = Common::Compression::DecompressDataZSTD(compressed);
284 SaveArrayToPrecompiled(decompressed.data(), decompressed.size());
285 precompiled_cache_virtual_file_offset = 0;
286
287 ShaderCacheVersionHash file_hash{};
288 if (!LoadArrayFromPrecompiled(file_hash.data(), file_hash.size())) {
289 precompiled_cache_virtual_file_offset = 0;
290 return std::nullopt;
291 }
292 if (GetShaderCacheVersionHash() != file_hash) {
293 LOG_INFO(Render_OpenGL, "Precompiled cache is from another version of the emulator");
294 precompiled_cache_virtual_file_offset = 0;
295 return std::nullopt;
296 }
297
298 std::vector<ShaderDiskCachePrecompiled> entries;
299 while (precompiled_cache_virtual_file_offset < precompiled_cache_virtual_file.GetSize()) {
300 u32 binary_size;
301 auto& entry = entries.emplace_back();
302 if (!LoadObjectFromPrecompiled(entry.unique_identifier) ||
303 !LoadObjectFromPrecompiled(entry.binary_format) ||
304 !LoadObjectFromPrecompiled(binary_size)) {
305 return std::nullopt;
306 }
307
308 entry.binary.resize(binary_size);
309 if (!LoadArrayFromPrecompiled(entry.binary.data(), entry.binary.size())) {
310 return std::nullopt;
311 }
312 }
313 return entries;
314}
315
316void ShaderDiskCacheOpenGL::InvalidateTransferable() {
317 if (!Common::FS::RemoveFile(GetTransferablePath())) {
318 LOG_ERROR(Render_OpenGL, "Failed to invalidate transferable file={}",
319 Common::FS::PathToUTF8String(GetTransferablePath()));
320 }
321 InvalidatePrecompiled();
322}
323
324void ShaderDiskCacheOpenGL::InvalidatePrecompiled() {
325 // Clear virtaul precompiled cache file
326 precompiled_cache_virtual_file.Resize(0);
327
328 if (!Common::FS::RemoveFile(GetPrecompiledPath())) {
329 LOG_ERROR(Render_OpenGL, "Failed to invalidate precompiled file={}",
330 Common::FS::PathToUTF8String(GetPrecompiledPath()));
331 }
332}
333
334void ShaderDiskCacheOpenGL::SaveEntry(const ShaderDiskCacheEntry& entry) {
335 if (!is_usable) {
336 return;
337 }
338
339 const u64 id = entry.unique_identifier;
340 if (stored_transferable.contains(id)) {
341 // The shader already exists
342 return;
343 }
344
345 Common::FS::IOFile file = AppendTransferableFile();
346 if (!file.IsOpen()) {
347 return;
348 }
349 if (!entry.Save(file)) {
350 LOG_ERROR(Render_OpenGL, "Failed to save raw transferable cache entry, removing");
351 file.Close();
352 InvalidateTransferable();
353 return;
354 }
355
356 stored_transferable.insert(id);
357}
358
359void ShaderDiskCacheOpenGL::SavePrecompiled(u64 unique_identifier, GLuint program) {
360 if (!is_usable) {
361 return;
362 }
363
364 // TODO(Rodrigo): This is a design smell. I shouldn't be having to manually write the header
365 // when writing the dump. This should be done the moment I get access to write to the virtual
366 // file.
367 if (precompiled_cache_virtual_file.GetSize() == 0) {
368 SavePrecompiledHeaderToVirtualPrecompiledCache();
369 }
370
371 GLint binary_length;
372 glGetProgramiv(program, GL_PROGRAM_BINARY_LENGTH, &binary_length);
373
374 GLenum binary_format;
375 std::vector<u8> binary(binary_length);
376 glGetProgramBinary(program, binary_length, nullptr, &binary_format, binary.data());
377
378 if (!SaveObjectToPrecompiled(unique_identifier) || !SaveObjectToPrecompiled(binary_format) ||
379 !SaveObjectToPrecompiled(static_cast<u32>(binary.size())) ||
380 !SaveArrayToPrecompiled(binary.data(), binary.size())) {
381 LOG_ERROR(Render_OpenGL, "Failed to save binary program file in shader={:016X}, removing",
382 unique_identifier);
383 InvalidatePrecompiled();
384 }
385}
386
387Common::FS::IOFile ShaderDiskCacheOpenGL::AppendTransferableFile() const {
388 if (!EnsureDirectories()) {
389 return {};
390 }
391
392 const auto transferable_path{GetTransferablePath()};
393 const bool existed = Common::FS::Exists(transferable_path);
394
395 Common::FS::IOFile file{transferable_path, Common::FS::FileAccessMode::Append,
396 Common::FS::FileType::BinaryFile};
397 if (!file.IsOpen()) {
398 LOG_ERROR(Render_OpenGL, "Failed to open transferable cache in path={}",
399 Common::FS::PathToUTF8String(transferable_path));
400 return {};
401 }
402 if (!existed || file.GetSize() == 0) {
403 // If the file didn't exist, write its version
404 if (!file.WriteObject(NativeVersion)) {
405 LOG_ERROR(Render_OpenGL, "Failed to write transferable cache version in path={}",
406 Common::FS::PathToUTF8String(transferable_path));
407 return {};
408 }
409 }
410 return file;
411}
412
413void ShaderDiskCacheOpenGL::SavePrecompiledHeaderToVirtualPrecompiledCache() {
414 const auto hash{GetShaderCacheVersionHash()};
415 if (!SaveArrayToPrecompiled(hash.data(), hash.size())) {
416 LOG_ERROR(
417 Render_OpenGL,
418 "Failed to write precompiled cache version hash to virtual precompiled cache file");
419 }
420}
421
422void ShaderDiskCacheOpenGL::SaveVirtualPrecompiledFile() {
423 precompiled_cache_virtual_file_offset = 0;
424 const std::vector<u8> uncompressed = precompiled_cache_virtual_file.ReadAllBytes();
425 const std::vector<u8> compressed =
426 Common::Compression::CompressDataZSTDDefault(uncompressed.data(), uncompressed.size());
427
428 const auto precompiled_path = GetPrecompiledPath();
429 Common::FS::IOFile file{precompiled_path, Common::FS::FileAccessMode::Write,
430 Common::FS::FileType::BinaryFile};
431
432 if (!file.IsOpen()) {
433 LOG_ERROR(Render_OpenGL, "Failed to open precompiled cache in path={}",
434 Common::FS::PathToUTF8String(precompiled_path));
435 return;
436 }
437 if (file.Write(compressed) != compressed.size()) {
438 LOG_ERROR(Render_OpenGL, "Failed to write precompiled cache version in path={}",
439 Common::FS::PathToUTF8String(precompiled_path));
440 }
441}
442
443bool ShaderDiskCacheOpenGL::EnsureDirectories() const {
444 const auto CreateDir = [](const std::filesystem::path& dir) {
445 if (!Common::FS::CreateDir(dir)) {
446 LOG_ERROR(Render_OpenGL, "Failed to create directory={}",
447 Common::FS::PathToUTF8String(dir));
448 return false;
449 }
450 return true;
451 };
452
453 return CreateDir(Common::FS::GetYuzuPath(Common::FS::YuzuPath::ShaderDir)) &&
454 CreateDir(GetBaseDir()) && CreateDir(GetTransferableDir()) &&
455 CreateDir(GetPrecompiledDir());
456}
457
458std::filesystem::path ShaderDiskCacheOpenGL::GetTransferablePath() const {
459 return GetTransferableDir() / fmt::format("{}.bin", GetTitleID());
460}
461
462std::filesystem::path ShaderDiskCacheOpenGL::GetPrecompiledPath() const {
463 return GetPrecompiledDir() / fmt::format("{}.bin", GetTitleID());
464}
465
466std::filesystem::path ShaderDiskCacheOpenGL::GetTransferableDir() const {
467 return GetBaseDir() / "transferable";
468}
469
470std::filesystem::path ShaderDiskCacheOpenGL::GetPrecompiledDir() const {
471 return GetBaseDir() / "precompiled";
472}
473
474std::filesystem::path ShaderDiskCacheOpenGL::GetBaseDir() const {
475 return Common::FS::GetYuzuPath(Common::FS::YuzuPath::ShaderDir) / "opengl";
476}
477
478std::string ShaderDiskCacheOpenGL::GetTitleID() const {
479 return fmt::format("{:016X}", title_id);
480}
481
482} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.h b/src/video_core/renderer_opengl/gl_shader_disk_cache.h
deleted file mode 100644
index f8bc23868..000000000
--- a/src/video_core/renderer_opengl/gl_shader_disk_cache.h
+++ /dev/null
@@ -1,176 +0,0 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <filesystem>
8#include <optional>
9#include <string>
10#include <tuple>
11#include <type_traits>
12#include <unordered_map>
13#include <unordered_set>
14#include <utility>
15#include <vector>
16
17#include <glad/glad.h>
18
19#include "common/assert.h"
20#include "common/common_types.h"
21#include "core/file_sys/vfs_vector.h"
22#include "video_core/engines/shader_type.h"
23#include "video_core/shader/registry.h"
24
25namespace Common::FS {
26class IOFile;
27}
28
29namespace OpenGL {
30
31using ProgramCode = std::vector<u64>;
32
33/// Describes a shader and how it's used by the guest GPU
34struct ShaderDiskCacheEntry {
35 ShaderDiskCacheEntry();
36 ~ShaderDiskCacheEntry();
37
38 bool Load(Common::FS::IOFile& file);
39
40 bool Save(Common::FS::IOFile& file) const;
41
42 bool HasProgramA() const {
43 return !code.empty() && !code_b.empty();
44 }
45
46 Tegra::Engines::ShaderType type{};
47 ProgramCode code;
48 ProgramCode code_b;
49
50 u64 unique_identifier = 0;
51 std::optional<u32> texture_handler_size;
52 u32 bound_buffer = 0;
53 VideoCommon::Shader::GraphicsInfo graphics_info;
54 VideoCommon::Shader::ComputeInfo compute_info;
55 VideoCommon::Shader::KeyMap keys;
56 VideoCommon::Shader::BoundSamplerMap bound_samplers;
57 VideoCommon::Shader::SeparateSamplerMap separate_samplers;
58 VideoCommon::Shader::BindlessSamplerMap bindless_samplers;
59};
60
61/// Contains an OpenGL dumped binary program
62struct ShaderDiskCachePrecompiled {
63 u64 unique_identifier = 0;
64 GLenum binary_format = 0;
65 std::vector<u8> binary;
66};
67
68class ShaderDiskCacheOpenGL {
69public:
70 explicit ShaderDiskCacheOpenGL();
71 ~ShaderDiskCacheOpenGL();
72
73 /// Binds a title ID for all future operations.
74 void BindTitleID(u64 title_id);
75
76 /// Loads transferable cache. If file has a old version or on failure, it deletes the file.
77 std::optional<std::vector<ShaderDiskCacheEntry>> LoadTransferable();
78
79 /// Loads current game's precompiled cache. Invalidates on failure.
80 std::vector<ShaderDiskCachePrecompiled> LoadPrecompiled();
81
82 /// Removes the transferable (and precompiled) cache file.
83 void InvalidateTransferable();
84
85 /// Removes the precompiled cache file and clears virtual precompiled cache file.
86 void InvalidatePrecompiled();
87
88 /// Saves a raw dump to the transferable file. Checks for collisions.
89 void SaveEntry(const ShaderDiskCacheEntry& entry);
90
91 /// Saves a dump entry to the precompiled file. Does not check for collisions.
92 void SavePrecompiled(u64 unique_identifier, GLuint program);
93
94 /// Serializes virtual precompiled shader cache file to real file
95 void SaveVirtualPrecompiledFile();
96
97private:
98 /// Loads the transferable cache. Returns empty on failure.
99 std::optional<std::vector<ShaderDiskCachePrecompiled>> LoadPrecompiledFile(
100 Common::FS::IOFile& file);
101
102 /// Opens current game's transferable file and write it's header if it doesn't exist
103 Common::FS::IOFile AppendTransferableFile() const;
104
105 /// Save precompiled header to precompiled_cache_in_memory
106 void SavePrecompiledHeaderToVirtualPrecompiledCache();
107
108 /// Create shader disk cache directories. Returns true on success.
109 bool EnsureDirectories() const;
110
111 /// Gets current game's transferable file path
112 std::filesystem::path GetTransferablePath() const;
113
114 /// Gets current game's precompiled file path
115 std::filesystem::path GetPrecompiledPath() const;
116
117 /// Get user's transferable directory path
118 std::filesystem::path GetTransferableDir() const;
119
120 /// Get user's precompiled directory path
121 std::filesystem::path GetPrecompiledDir() const;
122
123 /// Get user's shader directory path
124 std::filesystem::path GetBaseDir() const;
125
126 /// Get current game's title id
127 std::string GetTitleID() const;
128
129 template <typename T>
130 bool SaveArrayToPrecompiled(const T* data, std::size_t length) {
131 const std::size_t write_length = precompiled_cache_virtual_file.WriteArray(
132 data, length, precompiled_cache_virtual_file_offset);
133 precompiled_cache_virtual_file_offset += write_length;
134 return write_length == sizeof(T) * length;
135 }
136
137 template <typename T>
138 bool LoadArrayFromPrecompiled(T* data, std::size_t length) {
139 const std::size_t read_length = precompiled_cache_virtual_file.ReadArray(
140 data, length, precompiled_cache_virtual_file_offset);
141 precompiled_cache_virtual_file_offset += read_length;
142 return read_length == sizeof(T) * length;
143 }
144
145 template <typename T>
146 bool SaveObjectToPrecompiled(const T& object) {
147 return SaveArrayToPrecompiled(&object, 1);
148 }
149
150 bool SaveObjectToPrecompiled(bool object) {
151 const auto value = static_cast<u8>(object);
152 return SaveArrayToPrecompiled(&value, 1);
153 }
154
155 template <typename T>
156 bool LoadObjectFromPrecompiled(T& object) {
157 return LoadArrayFromPrecompiled(&object, 1);
158 }
159
160 // Stores whole precompiled cache which will be read from or saved to the precompiled chache
161 // file
162 FileSys::VectorVfsFile precompiled_cache_virtual_file;
163 // Stores the current offset of the precompiled cache file for IO purposes
164 std::size_t precompiled_cache_virtual_file_offset = 0;
165
166 // Stored transferable shaders
167 std::unordered_set<u64> stored_transferable;
168
169 /// Title ID to operate on
170 u64 title_id = 0;
171
172 // The cache has been loaded at boot
173 bool is_usable = false;
174};
175
176} // namespace OpenGL
diff --git a/src/video_core/renderer_vulkan/blit_image.cpp b/src/video_core/renderer_vulkan/blit_image.cpp
index b7f5b8bc2..6c0d5c7f4 100644
--- a/src/video_core/renderer_vulkan/blit_image.cpp
+++ b/src/video_core/renderer_vulkan/blit_image.cpp
@@ -323,7 +323,6 @@ void BindBlitState(vk::CommandBuffer cmdbuf, VkPipelineLayout layout, const Regi
323 cmdbuf.SetScissor(0, scissor); 323 cmdbuf.SetScissor(0, scissor);
324 cmdbuf.PushConstants(layout, VK_SHADER_STAGE_VERTEX_BIT, push_constants); 324 cmdbuf.PushConstants(layout, VK_SHADER_STAGE_VERTEX_BIT, push_constants);
325} 325}
326
327} // Anonymous namespace 326} // Anonymous namespace
328 327
329BlitImageHelper::BlitImageHelper(const Device& device_, VKScheduler& scheduler_, 328BlitImageHelper::BlitImageHelper(const Device& device_, VKScheduler& scheduler_,
diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp
index 3a48219b7..7a3660496 100644
--- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp
+++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp
@@ -8,146 +8,14 @@
8#include "video_core/renderer_vulkan/vk_descriptor_pool.h" 8#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
9#include "video_core/renderer_vulkan/vk_pipeline_cache.h" 9#include "video_core/renderer_vulkan/vk_pipeline_cache.h"
10#include "video_core/renderer_vulkan/vk_scheduler.h" 10#include "video_core/renderer_vulkan/vk_scheduler.h"
11#include "video_core/renderer_vulkan/vk_shader_decompiler.h"
12#include "video_core/renderer_vulkan/vk_update_descriptor.h" 11#include "video_core/renderer_vulkan/vk_update_descriptor.h"
13#include "video_core/vulkan_common/vulkan_device.h" 12#include "video_core/vulkan_common/vulkan_device.h"
14#include "video_core/vulkan_common/vulkan_wrapper.h" 13#include "video_core/vulkan_common/vulkan_wrapper.h"
15 14
16namespace Vulkan { 15namespace Vulkan {
17 16
18VKComputePipeline::VKComputePipeline(const Device& device_, VKScheduler& scheduler_, 17ComputePipeline::ComputePipeline() = default;
19 VKDescriptorPool& descriptor_pool_,
20 VKUpdateDescriptorQueue& update_descriptor_queue_,
21 const SPIRVShader& shader_)
22 : device{device_}, scheduler{scheduler_}, entries{shader_.entries},
23 descriptor_set_layout{CreateDescriptorSetLayout()},
24 descriptor_allocator{descriptor_pool_, *descriptor_set_layout},
25 update_descriptor_queue{update_descriptor_queue_}, layout{CreatePipelineLayout()},
26 descriptor_template{CreateDescriptorUpdateTemplate()},
27 shader_module{CreateShaderModule(shader_.code)}, pipeline{CreatePipeline()} {}
28 18
29VKComputePipeline::~VKComputePipeline() = default; 19ComputePipeline::~ComputePipeline() = default;
30
31VkDescriptorSet VKComputePipeline::CommitDescriptorSet() {
32 if (!descriptor_template) {
33 return {};
34 }
35 const VkDescriptorSet set = descriptor_allocator.Commit();
36 update_descriptor_queue.Send(*descriptor_template, set);
37 return set;
38}
39
40vk::DescriptorSetLayout VKComputePipeline::CreateDescriptorSetLayout() const {
41 std::vector<VkDescriptorSetLayoutBinding> bindings;
42 u32 binding = 0;
43 const auto add_bindings = [&](VkDescriptorType descriptor_type, std::size_t num_entries) {
44 // TODO(Rodrigo): Maybe make individual bindings here?
45 for (u32 bindpoint = 0; bindpoint < static_cast<u32>(num_entries); ++bindpoint) {
46 bindings.push_back({
47 .binding = binding++,
48 .descriptorType = descriptor_type,
49 .descriptorCount = 1,
50 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
51 .pImmutableSamplers = nullptr,
52 });
53 }
54 };
55 add_bindings(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, entries.const_buffers.size());
56 add_bindings(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, entries.global_buffers.size());
57 add_bindings(VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, entries.uniform_texels.size());
58 add_bindings(VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, entries.samplers.size());
59 add_bindings(VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER, entries.storage_texels.size());
60 add_bindings(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, entries.images.size());
61
62 return device.GetLogical().CreateDescriptorSetLayout({
63 .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
64 .pNext = nullptr,
65 .flags = 0,
66 .bindingCount = static_cast<u32>(bindings.size()),
67 .pBindings = bindings.data(),
68 });
69}
70
71vk::PipelineLayout VKComputePipeline::CreatePipelineLayout() const {
72 return device.GetLogical().CreatePipelineLayout({
73 .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
74 .pNext = nullptr,
75 .flags = 0,
76 .setLayoutCount = 1,
77 .pSetLayouts = descriptor_set_layout.address(),
78 .pushConstantRangeCount = 0,
79 .pPushConstantRanges = nullptr,
80 });
81}
82
83vk::DescriptorUpdateTemplateKHR VKComputePipeline::CreateDescriptorUpdateTemplate() const {
84 std::vector<VkDescriptorUpdateTemplateEntryKHR> template_entries;
85 u32 binding = 0;
86 u32 offset = 0;
87 FillDescriptorUpdateTemplateEntries(entries, binding, offset, template_entries);
88 if (template_entries.empty()) {
89 // If the shader doesn't use descriptor sets, skip template creation.
90 return {};
91 }
92
93 return device.GetLogical().CreateDescriptorUpdateTemplateKHR({
94 .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_UPDATE_TEMPLATE_CREATE_INFO_KHR,
95 .pNext = nullptr,
96 .flags = 0,
97 .descriptorUpdateEntryCount = static_cast<u32>(template_entries.size()),
98 .pDescriptorUpdateEntries = template_entries.data(),
99 .templateType = VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_DESCRIPTOR_SET_KHR,
100 .descriptorSetLayout = *descriptor_set_layout,
101 .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
102 .pipelineLayout = *layout,
103 .set = DESCRIPTOR_SET,
104 });
105}
106
107vk::ShaderModule VKComputePipeline::CreateShaderModule(const std::vector<u32>& code) const {
108 device.SaveShader(code);
109
110 return device.GetLogical().CreateShaderModule({
111 .sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO,
112 .pNext = nullptr,
113 .flags = 0,
114 .codeSize = code.size() * sizeof(u32),
115 .pCode = code.data(),
116 });
117}
118
119vk::Pipeline VKComputePipeline::CreatePipeline() const {
120
121 VkComputePipelineCreateInfo ci{
122 .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
123 .pNext = nullptr,
124 .flags = 0,
125 .stage =
126 {
127 .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
128 .pNext = nullptr,
129 .flags = 0,
130 .stage = VK_SHADER_STAGE_COMPUTE_BIT,
131 .module = *shader_module,
132 .pName = "main",
133 .pSpecializationInfo = nullptr,
134 },
135 .layout = *layout,
136 .basePipelineHandle = nullptr,
137 .basePipelineIndex = 0,
138 };
139
140 const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT subgroup_size_ci{
141 .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT,
142 .pNext = nullptr,
143 .requiredSubgroupSize = GuestWarpSize,
144 };
145
146 if (entries.uses_warps && device.IsGuestWarpSizeSupported(VK_SHADER_STAGE_COMPUTE_BIT)) {
147 ci.stage.pNext = &subgroup_size_ci;
148 }
149
150 return device.GetLogical().CreateComputePipeline(ci);
151}
152 20
153} // namespace Vulkan 21} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.h b/src/video_core/renderer_vulkan/vk_compute_pipeline.h
index 7e16575ac..433d8bb3d 100644
--- a/src/video_core/renderer_vulkan/vk_compute_pipeline.h
+++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.h
@@ -6,7 +6,6 @@
6 6
7#include "common/common_types.h" 7#include "common/common_types.h"
8#include "video_core/renderer_vulkan/vk_descriptor_pool.h" 8#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
9#include "video_core/renderer_vulkan/vk_shader_decompiler.h"
10#include "video_core/vulkan_common/vulkan_wrapper.h" 9#include "video_core/vulkan_common/vulkan_wrapper.h"
11 10
12namespace Vulkan { 11namespace Vulkan {
@@ -15,50 +14,10 @@ class Device;
15class VKScheduler; 14class VKScheduler;
16class VKUpdateDescriptorQueue; 15class VKUpdateDescriptorQueue;
17 16
18class VKComputePipeline final { 17class ComputePipeline {
19public: 18public:
20 explicit VKComputePipeline(const Device& device_, VKScheduler& scheduler_, 19 explicit ComputePipeline();
21 VKDescriptorPool& descriptor_pool_, 20 ~ComputePipeline();
22 VKUpdateDescriptorQueue& update_descriptor_queue_,
23 const SPIRVShader& shader_);
24 ~VKComputePipeline();
25
26 VkDescriptorSet CommitDescriptorSet();
27
28 VkPipeline GetHandle() const {
29 return *pipeline;
30 }
31
32 VkPipelineLayout GetLayout() const {
33 return *layout;
34 }
35
36 const ShaderEntries& GetEntries() const {
37 return entries;
38 }
39
40private:
41 vk::DescriptorSetLayout CreateDescriptorSetLayout() const;
42
43 vk::PipelineLayout CreatePipelineLayout() const;
44
45 vk::DescriptorUpdateTemplateKHR CreateDescriptorUpdateTemplate() const;
46
47 vk::ShaderModule CreateShaderModule(const std::vector<u32>& code) const;
48
49 vk::Pipeline CreatePipeline() const;
50
51 const Device& device;
52 VKScheduler& scheduler;
53 ShaderEntries entries;
54
55 vk::DescriptorSetLayout descriptor_set_layout;
56 DescriptorAllocator descriptor_allocator;
57 VKUpdateDescriptorQueue& update_descriptor_queue;
58 vk::PipelineLayout layout;
59 vk::DescriptorUpdateTemplateKHR descriptor_template;
60 vk::ShaderModule shader_module;
61 vk::Pipeline pipeline;
62}; 21};
63 22
64} // namespace Vulkan 23} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp
deleted file mode 100644
index fc6dd83eb..000000000
--- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp
+++ /dev/null
@@ -1,484 +0,0 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <algorithm>
6#include <array>
7#include <cstring>
8#include <vector>
9
10#include "common/common_types.h"
11#include "common/microprofile.h"
12#include "video_core/renderer_vulkan/fixed_pipeline_state.h"
13#include "video_core/renderer_vulkan/maxwell_to_vk.h"
14#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
15#include "video_core/renderer_vulkan/vk_graphics_pipeline.h"
16#include "video_core/renderer_vulkan/vk_pipeline_cache.h"
17#include "video_core/renderer_vulkan/vk_scheduler.h"
18#include "video_core/renderer_vulkan/vk_update_descriptor.h"
19#include "video_core/vulkan_common/vulkan_device.h"
20#include "video_core/vulkan_common/vulkan_wrapper.h"
21
22namespace Vulkan {
23
24MICROPROFILE_DECLARE(Vulkan_PipelineCache);
25
26namespace {
27
28template <class StencilFace>
29VkStencilOpState GetStencilFaceState(const StencilFace& face) {
30 return {
31 .failOp = MaxwellToVK::StencilOp(face.ActionStencilFail()),
32 .passOp = MaxwellToVK::StencilOp(face.ActionDepthPass()),
33 .depthFailOp = MaxwellToVK::StencilOp(face.ActionDepthFail()),
34 .compareOp = MaxwellToVK::ComparisonOp(face.TestFunc()),
35 .compareMask = 0,
36 .writeMask = 0,
37 .reference = 0,
38 };
39}
40
41bool SupportsPrimitiveRestart(VkPrimitiveTopology topology) {
42 static constexpr std::array unsupported_topologies = {
43 VK_PRIMITIVE_TOPOLOGY_POINT_LIST,
44 VK_PRIMITIVE_TOPOLOGY_LINE_LIST,
45 VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST,
46 VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY,
47 VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY,
48 VK_PRIMITIVE_TOPOLOGY_PATCH_LIST};
49 return std::find(std::begin(unsupported_topologies), std::end(unsupported_topologies),
50 topology) == std::end(unsupported_topologies);
51}
52
53VkViewportSwizzleNV UnpackViewportSwizzle(u16 swizzle) {
54 union Swizzle {
55 u32 raw;
56 BitField<0, 3, Maxwell::ViewportSwizzle> x;
57 BitField<4, 3, Maxwell::ViewportSwizzle> y;
58 BitField<8, 3, Maxwell::ViewportSwizzle> z;
59 BitField<12, 3, Maxwell::ViewportSwizzle> w;
60 };
61 const Swizzle unpacked{swizzle};
62
63 return {
64 .x = MaxwellToVK::ViewportSwizzle(unpacked.x),
65 .y = MaxwellToVK::ViewportSwizzle(unpacked.y),
66 .z = MaxwellToVK::ViewportSwizzle(unpacked.z),
67 .w = MaxwellToVK::ViewportSwizzle(unpacked.w),
68 };
69}
70
71VkSampleCountFlagBits ConvertMsaaMode(Tegra::Texture::MsaaMode msaa_mode) {
72 switch (msaa_mode) {
73 case Tegra::Texture::MsaaMode::Msaa1x1:
74 return VK_SAMPLE_COUNT_1_BIT;
75 case Tegra::Texture::MsaaMode::Msaa2x1:
76 case Tegra::Texture::MsaaMode::Msaa2x1_D3D:
77 return VK_SAMPLE_COUNT_2_BIT;
78 case Tegra::Texture::MsaaMode::Msaa2x2:
79 case Tegra::Texture::MsaaMode::Msaa2x2_VC4:
80 case Tegra::Texture::MsaaMode::Msaa2x2_VC12:
81 return VK_SAMPLE_COUNT_4_BIT;
82 case Tegra::Texture::MsaaMode::Msaa4x2:
83 case Tegra::Texture::MsaaMode::Msaa4x2_D3D:
84 case Tegra::Texture::MsaaMode::Msaa4x2_VC8:
85 case Tegra::Texture::MsaaMode::Msaa4x2_VC24:
86 return VK_SAMPLE_COUNT_8_BIT;
87 case Tegra::Texture::MsaaMode::Msaa4x4:
88 return VK_SAMPLE_COUNT_16_BIT;
89 default:
90 UNREACHABLE_MSG("Invalid msaa_mode={}", static_cast<int>(msaa_mode));
91 return VK_SAMPLE_COUNT_1_BIT;
92 }
93}
94
95} // Anonymous namespace
96
97VKGraphicsPipeline::VKGraphicsPipeline(const Device& device_, VKScheduler& scheduler_,
98 VKDescriptorPool& descriptor_pool_,
99 VKUpdateDescriptorQueue& update_descriptor_queue_,
100 const GraphicsPipelineCacheKey& key,
101 vk::Span<VkDescriptorSetLayoutBinding> bindings,
102 const SPIRVProgram& program, u32 num_color_buffers)
103 : device{device_}, scheduler{scheduler_}, cache_key{key}, hash{cache_key.Hash()},
104 descriptor_set_layout{CreateDescriptorSetLayout(bindings)},
105 descriptor_allocator{descriptor_pool_, *descriptor_set_layout},
106 update_descriptor_queue{update_descriptor_queue_}, layout{CreatePipelineLayout()},
107 descriptor_template{CreateDescriptorUpdateTemplate(program)},
108 modules(CreateShaderModules(program)),
109 pipeline(CreatePipeline(program, cache_key.renderpass, num_color_buffers)) {}
110
111VKGraphicsPipeline::~VKGraphicsPipeline() = default;
112
113VkDescriptorSet VKGraphicsPipeline::CommitDescriptorSet() {
114 if (!descriptor_template) {
115 return {};
116 }
117 const VkDescriptorSet set = descriptor_allocator.Commit();
118 update_descriptor_queue.Send(*descriptor_template, set);
119 return set;
120}
121
122vk::DescriptorSetLayout VKGraphicsPipeline::CreateDescriptorSetLayout(
123 vk::Span<VkDescriptorSetLayoutBinding> bindings) const {
124 const VkDescriptorSetLayoutCreateInfo ci{
125 .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
126 .pNext = nullptr,
127 .flags = 0,
128 .bindingCount = bindings.size(),
129 .pBindings = bindings.data(),
130 };
131 return device.GetLogical().CreateDescriptorSetLayout(ci);
132}
133
134vk::PipelineLayout VKGraphicsPipeline::CreatePipelineLayout() const {
135 const VkPipelineLayoutCreateInfo ci{
136 .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
137 .pNext = nullptr,
138 .flags = 0,
139 .setLayoutCount = 1,
140 .pSetLayouts = descriptor_set_layout.address(),
141 .pushConstantRangeCount = 0,
142 .pPushConstantRanges = nullptr,
143 };
144 return device.GetLogical().CreatePipelineLayout(ci);
145}
146
147vk::DescriptorUpdateTemplateKHR VKGraphicsPipeline::CreateDescriptorUpdateTemplate(
148 const SPIRVProgram& program) const {
149 std::vector<VkDescriptorUpdateTemplateEntry> template_entries;
150 u32 binding = 0;
151 u32 offset = 0;
152 for (const auto& stage : program) {
153 if (stage) {
154 FillDescriptorUpdateTemplateEntries(stage->entries, binding, offset, template_entries);
155 }
156 }
157 if (template_entries.empty()) {
158 // If the shader doesn't use descriptor sets, skip template creation.
159 return {};
160 }
161
162 const VkDescriptorUpdateTemplateCreateInfoKHR ci{
163 .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_UPDATE_TEMPLATE_CREATE_INFO_KHR,
164 .pNext = nullptr,
165 .flags = 0,
166 .descriptorUpdateEntryCount = static_cast<u32>(template_entries.size()),
167 .pDescriptorUpdateEntries = template_entries.data(),
168 .templateType = VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_DESCRIPTOR_SET_KHR,
169 .descriptorSetLayout = *descriptor_set_layout,
170 .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
171 .pipelineLayout = *layout,
172 .set = DESCRIPTOR_SET,
173 };
174 return device.GetLogical().CreateDescriptorUpdateTemplateKHR(ci);
175}
176
177std::vector<vk::ShaderModule> VKGraphicsPipeline::CreateShaderModules(
178 const SPIRVProgram& program) const {
179 VkShaderModuleCreateInfo ci{
180 .sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO,
181 .pNext = nullptr,
182 .flags = 0,
183 .codeSize = 0,
184 .pCode = nullptr,
185 };
186
187 std::vector<vk::ShaderModule> shader_modules;
188 shader_modules.reserve(Maxwell::MaxShaderStage);
189 for (std::size_t i = 0; i < Maxwell::MaxShaderStage; ++i) {
190 const auto& stage = program[i];
191 if (!stage) {
192 continue;
193 }
194
195 device.SaveShader(stage->code);
196
197 ci.codeSize = stage->code.size() * sizeof(u32);
198 ci.pCode = stage->code.data();
199 shader_modules.push_back(device.GetLogical().CreateShaderModule(ci));
200 }
201 return shader_modules;
202}
203
204vk::Pipeline VKGraphicsPipeline::CreatePipeline(const SPIRVProgram& program,
205 VkRenderPass renderpass,
206 u32 num_color_buffers) const {
207 const auto& state = cache_key.fixed_state;
208 const auto& viewport_swizzles = state.viewport_swizzles;
209
210 FixedPipelineState::DynamicState dynamic;
211 if (device.IsExtExtendedDynamicStateSupported()) {
212 // Insert dummy values, as long as they are valid they don't matter as extended dynamic
213 // state is ignored
214 dynamic.raw1 = 0;
215 dynamic.raw2 = 0;
216 dynamic.vertex_strides.fill(0);
217 } else {
218 dynamic = state.dynamic_state;
219 }
220
221 std::vector<VkVertexInputBindingDescription> vertex_bindings;
222 std::vector<VkVertexInputBindingDivisorDescriptionEXT> vertex_binding_divisors;
223 for (std::size_t index = 0; index < Maxwell::NumVertexArrays; ++index) {
224 const bool instanced = state.binding_divisors[index] != 0;
225 const auto rate = instanced ? VK_VERTEX_INPUT_RATE_INSTANCE : VK_VERTEX_INPUT_RATE_VERTEX;
226 vertex_bindings.push_back({
227 .binding = static_cast<u32>(index),
228 .stride = dynamic.vertex_strides[index],
229 .inputRate = rate,
230 });
231 if (instanced) {
232 vertex_binding_divisors.push_back({
233 .binding = static_cast<u32>(index),
234 .divisor = state.binding_divisors[index],
235 });
236 }
237 }
238
239 std::vector<VkVertexInputAttributeDescription> vertex_attributes;
240 const auto& input_attributes = program[0]->entries.attributes;
241 for (std::size_t index = 0; index < state.attributes.size(); ++index) {
242 const auto& attribute = state.attributes[index];
243 if (!attribute.enabled) {
244 continue;
245 }
246 if (!input_attributes.contains(static_cast<u32>(index))) {
247 // Skip attributes not used by the vertex shaders.
248 continue;
249 }
250 vertex_attributes.push_back({
251 .location = static_cast<u32>(index),
252 .binding = attribute.buffer,
253 .format = MaxwellToVK::VertexFormat(attribute.Type(), attribute.Size()),
254 .offset = attribute.offset,
255 });
256 }
257
258 VkPipelineVertexInputStateCreateInfo vertex_input_ci{
259 .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
260 .pNext = nullptr,
261 .flags = 0,
262 .vertexBindingDescriptionCount = static_cast<u32>(vertex_bindings.size()),
263 .pVertexBindingDescriptions = vertex_bindings.data(),
264 .vertexAttributeDescriptionCount = static_cast<u32>(vertex_attributes.size()),
265 .pVertexAttributeDescriptions = vertex_attributes.data(),
266 };
267
268 const VkPipelineVertexInputDivisorStateCreateInfoEXT input_divisor_ci{
269 .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_DIVISOR_STATE_CREATE_INFO_EXT,
270 .pNext = nullptr,
271 .vertexBindingDivisorCount = static_cast<u32>(vertex_binding_divisors.size()),
272 .pVertexBindingDivisors = vertex_binding_divisors.data(),
273 };
274 if (!vertex_binding_divisors.empty()) {
275 vertex_input_ci.pNext = &input_divisor_ci;
276 }
277
278 const auto input_assembly_topology = MaxwellToVK::PrimitiveTopology(device, state.topology);
279 const VkPipelineInputAssemblyStateCreateInfo input_assembly_ci{
280 .sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,
281 .pNext = nullptr,
282 .flags = 0,
283 .topology = MaxwellToVK::PrimitiveTopology(device, state.topology),
284 .primitiveRestartEnable = state.primitive_restart_enable != 0 &&
285 SupportsPrimitiveRestart(input_assembly_topology),
286 };
287
288 const VkPipelineTessellationStateCreateInfo tessellation_ci{
289 .sType = VK_STRUCTURE_TYPE_PIPELINE_TESSELLATION_STATE_CREATE_INFO,
290 .pNext = nullptr,
291 .flags = 0,
292 .patchControlPoints = state.patch_control_points_minus_one.Value() + 1,
293 };
294
295 VkPipelineViewportStateCreateInfo viewport_ci{
296 .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,
297 .pNext = nullptr,
298 .flags = 0,
299 .viewportCount = Maxwell::NumViewports,
300 .pViewports = nullptr,
301 .scissorCount = Maxwell::NumViewports,
302 .pScissors = nullptr,
303 };
304
305 std::array<VkViewportSwizzleNV, Maxwell::NumViewports> swizzles;
306 std::ranges::transform(viewport_swizzles, swizzles.begin(), UnpackViewportSwizzle);
307 VkPipelineViewportSwizzleStateCreateInfoNV swizzle_ci{
308 .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_SWIZZLE_STATE_CREATE_INFO_NV,
309 .pNext = nullptr,
310 .flags = 0,
311 .viewportCount = Maxwell::NumViewports,
312 .pViewportSwizzles = swizzles.data(),
313 };
314 if (device.IsNvViewportSwizzleSupported()) {
315 viewport_ci.pNext = &swizzle_ci;
316 }
317
318 const VkPipelineRasterizationStateCreateInfo rasterization_ci{
319 .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
320 .pNext = nullptr,
321 .flags = 0,
322 .depthClampEnable =
323 static_cast<VkBool32>(state.depth_clamp_disabled == 0 ? VK_TRUE : VK_FALSE),
324 .rasterizerDiscardEnable =
325 static_cast<VkBool32>(state.rasterize_enable == 0 ? VK_TRUE : VK_FALSE),
326 .polygonMode = VK_POLYGON_MODE_FILL,
327 .cullMode = static_cast<VkCullModeFlags>(
328 dynamic.cull_enable ? MaxwellToVK::CullFace(dynamic.CullFace()) : VK_CULL_MODE_NONE),
329 .frontFace = MaxwellToVK::FrontFace(dynamic.FrontFace()),
330 .depthBiasEnable = state.depth_bias_enable,
331 .depthBiasConstantFactor = 0.0f,
332 .depthBiasClamp = 0.0f,
333 .depthBiasSlopeFactor = 0.0f,
334 .lineWidth = 1.0f,
335 };
336
337 const VkPipelineMultisampleStateCreateInfo multisample_ci{
338 .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
339 .pNext = nullptr,
340 .flags = 0,
341 .rasterizationSamples = ConvertMsaaMode(state.msaa_mode),
342 .sampleShadingEnable = VK_FALSE,
343 .minSampleShading = 0.0f,
344 .pSampleMask = nullptr,
345 .alphaToCoverageEnable = VK_FALSE,
346 .alphaToOneEnable = VK_FALSE,
347 };
348
349 const VkPipelineDepthStencilStateCreateInfo depth_stencil_ci{
350 .sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO,
351 .pNext = nullptr,
352 .flags = 0,
353 .depthTestEnable = dynamic.depth_test_enable,
354 .depthWriteEnable = dynamic.depth_write_enable,
355 .depthCompareOp = dynamic.depth_test_enable
356 ? MaxwellToVK::ComparisonOp(dynamic.DepthTestFunc())
357 : VK_COMPARE_OP_ALWAYS,
358 .depthBoundsTestEnable = dynamic.depth_bounds_enable,
359 .stencilTestEnable = dynamic.stencil_enable,
360 .front = GetStencilFaceState(dynamic.front),
361 .back = GetStencilFaceState(dynamic.back),
362 .minDepthBounds = 0.0f,
363 .maxDepthBounds = 0.0f,
364 };
365
366 std::array<VkPipelineColorBlendAttachmentState, Maxwell::NumRenderTargets> cb_attachments;
367 for (std::size_t index = 0; index < num_color_buffers; ++index) {
368 static constexpr std::array COMPONENT_TABLE{
369 VK_COLOR_COMPONENT_R_BIT,
370 VK_COLOR_COMPONENT_G_BIT,
371 VK_COLOR_COMPONENT_B_BIT,
372 VK_COLOR_COMPONENT_A_BIT,
373 };
374 const auto& blend = state.attachments[index];
375
376 VkColorComponentFlags color_components = 0;
377 for (std::size_t i = 0; i < COMPONENT_TABLE.size(); ++i) {
378 if (blend.Mask()[i]) {
379 color_components |= COMPONENT_TABLE[i];
380 }
381 }
382
383 cb_attachments[index] = {
384 .blendEnable = blend.enable != 0,
385 .srcColorBlendFactor = MaxwellToVK::BlendFactor(blend.SourceRGBFactor()),
386 .dstColorBlendFactor = MaxwellToVK::BlendFactor(blend.DestRGBFactor()),
387 .colorBlendOp = MaxwellToVK::BlendEquation(blend.EquationRGB()),
388 .srcAlphaBlendFactor = MaxwellToVK::BlendFactor(blend.SourceAlphaFactor()),
389 .dstAlphaBlendFactor = MaxwellToVK::BlendFactor(blend.DestAlphaFactor()),
390 .alphaBlendOp = MaxwellToVK::BlendEquation(blend.EquationAlpha()),
391 .colorWriteMask = color_components,
392 };
393 }
394
395 const VkPipelineColorBlendStateCreateInfo color_blend_ci{
396 .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,
397 .pNext = nullptr,
398 .flags = 0,
399 .logicOpEnable = VK_FALSE,
400 .logicOp = VK_LOGIC_OP_COPY,
401 .attachmentCount = num_color_buffers,
402 .pAttachments = cb_attachments.data(),
403 .blendConstants = {},
404 };
405
406 std::vector dynamic_states{
407 VK_DYNAMIC_STATE_VIEWPORT, VK_DYNAMIC_STATE_SCISSOR,
408 VK_DYNAMIC_STATE_DEPTH_BIAS, VK_DYNAMIC_STATE_BLEND_CONSTANTS,
409 VK_DYNAMIC_STATE_DEPTH_BOUNDS, VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK,
410 VK_DYNAMIC_STATE_STENCIL_WRITE_MASK, VK_DYNAMIC_STATE_STENCIL_REFERENCE,
411 };
412 if (device.IsExtExtendedDynamicStateSupported()) {
413 static constexpr std::array extended{
414 VK_DYNAMIC_STATE_CULL_MODE_EXT,
415 VK_DYNAMIC_STATE_FRONT_FACE_EXT,
416 VK_DYNAMIC_STATE_VERTEX_INPUT_BINDING_STRIDE_EXT,
417 VK_DYNAMIC_STATE_DEPTH_TEST_ENABLE_EXT,
418 VK_DYNAMIC_STATE_DEPTH_WRITE_ENABLE_EXT,
419 VK_DYNAMIC_STATE_DEPTH_COMPARE_OP_EXT,
420 VK_DYNAMIC_STATE_DEPTH_BOUNDS_TEST_ENABLE_EXT,
421 VK_DYNAMIC_STATE_STENCIL_TEST_ENABLE_EXT,
422 VK_DYNAMIC_STATE_STENCIL_OP_EXT,
423 };
424 dynamic_states.insert(dynamic_states.end(), extended.begin(), extended.end());
425 }
426
427 const VkPipelineDynamicStateCreateInfo dynamic_state_ci{
428 .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,
429 .pNext = nullptr,
430 .flags = 0,
431 .dynamicStateCount = static_cast<u32>(dynamic_states.size()),
432 .pDynamicStates = dynamic_states.data(),
433 };
434
435 const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT subgroup_size_ci{
436 .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT,
437 .pNext = nullptr,
438 .requiredSubgroupSize = GuestWarpSize,
439 };
440
441 std::vector<VkPipelineShaderStageCreateInfo> shader_stages;
442 std::size_t module_index = 0;
443 for (std::size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) {
444 if (!program[stage]) {
445 continue;
446 }
447
448 VkPipelineShaderStageCreateInfo& stage_ci = shader_stages.emplace_back();
449 stage_ci.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO;
450 stage_ci.pNext = nullptr;
451 stage_ci.flags = 0;
452 stage_ci.stage = MaxwellToVK::ShaderStage(static_cast<Tegra::Engines::ShaderType>(stage));
453 stage_ci.module = *modules[module_index++];
454 stage_ci.pName = "main";
455 stage_ci.pSpecializationInfo = nullptr;
456
457 if (program[stage]->entries.uses_warps && device.IsGuestWarpSizeSupported(stage_ci.stage)) {
458 stage_ci.pNext = &subgroup_size_ci;
459 }
460 }
461 return device.GetLogical().CreateGraphicsPipeline(VkGraphicsPipelineCreateInfo{
462 .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
463 .pNext = nullptr,
464 .flags = 0,
465 .stageCount = static_cast<u32>(shader_stages.size()),
466 .pStages = shader_stages.data(),
467 .pVertexInputState = &vertex_input_ci,
468 .pInputAssemblyState = &input_assembly_ci,
469 .pTessellationState = &tessellation_ci,
470 .pViewportState = &viewport_ci,
471 .pRasterizationState = &rasterization_ci,
472 .pMultisampleState = &multisample_ci,
473 .pDepthStencilState = &depth_stencil_ci,
474 .pColorBlendState = &color_blend_ci,
475 .pDynamicState = &dynamic_state_ci,
476 .layout = *layout,
477 .renderPass = renderpass,
478 .subpass = 0,
479 .basePipelineHandle = nullptr,
480 .basePipelineIndex = 0,
481 });
482}
483
484} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h
deleted file mode 100644
index 8b6a98fe0..000000000
--- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h
+++ /dev/null
@@ -1,103 +0,0 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <array>
8#include <optional>
9#include <vector>
10
11#include "common/common_types.h"
12#include "video_core/engines/maxwell_3d.h"
13#include "video_core/renderer_vulkan/fixed_pipeline_state.h"
14#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
15#include "video_core/renderer_vulkan/vk_shader_decompiler.h"
16#include "video_core/vulkan_common/vulkan_wrapper.h"
17
18namespace Vulkan {
19
20using Maxwell = Tegra::Engines::Maxwell3D::Regs;
21
22struct GraphicsPipelineCacheKey {
23 VkRenderPass renderpass;
24 std::array<GPUVAddr, Maxwell::MaxShaderProgram> shaders;
25 FixedPipelineState fixed_state;
26
27 std::size_t Hash() const noexcept;
28
29 bool operator==(const GraphicsPipelineCacheKey& rhs) const noexcept;
30
31 bool operator!=(const GraphicsPipelineCacheKey& rhs) const noexcept {
32 return !operator==(rhs);
33 }
34
35 std::size_t Size() const noexcept {
36 return sizeof(renderpass) + sizeof(shaders) + fixed_state.Size();
37 }
38};
39static_assert(std::has_unique_object_representations_v<GraphicsPipelineCacheKey>);
40static_assert(std::is_trivially_copyable_v<GraphicsPipelineCacheKey>);
41static_assert(std::is_trivially_constructible_v<GraphicsPipelineCacheKey>);
42
43class Device;
44class VKDescriptorPool;
45class VKScheduler;
46class VKUpdateDescriptorQueue;
47
48using SPIRVProgram = std::array<std::optional<SPIRVShader>, Maxwell::MaxShaderStage>;
49
50class VKGraphicsPipeline final {
51public:
52 explicit VKGraphicsPipeline(const Device& device_, VKScheduler& scheduler_,
53 VKDescriptorPool& descriptor_pool,
54 VKUpdateDescriptorQueue& update_descriptor_queue_,
55 const GraphicsPipelineCacheKey& key,
56 vk::Span<VkDescriptorSetLayoutBinding> bindings,
57 const SPIRVProgram& program, u32 num_color_buffers);
58 ~VKGraphicsPipeline();
59
60 VkDescriptorSet CommitDescriptorSet();
61
62 VkPipeline GetHandle() const {
63 return *pipeline;
64 }
65
66 VkPipelineLayout GetLayout() const {
67 return *layout;
68 }
69
70 GraphicsPipelineCacheKey GetCacheKey() const {
71 return cache_key;
72 }
73
74private:
75 vk::DescriptorSetLayout CreateDescriptorSetLayout(
76 vk::Span<VkDescriptorSetLayoutBinding> bindings) const;
77
78 vk::PipelineLayout CreatePipelineLayout() const;
79
80 vk::DescriptorUpdateTemplateKHR CreateDescriptorUpdateTemplate(
81 const SPIRVProgram& program) const;
82
83 std::vector<vk::ShaderModule> CreateShaderModules(const SPIRVProgram& program) const;
84
85 vk::Pipeline CreatePipeline(const SPIRVProgram& program, VkRenderPass renderpass,
86 u32 num_color_buffers) const;
87
88 const Device& device;
89 VKScheduler& scheduler;
90 const GraphicsPipelineCacheKey cache_key;
91 const u64 hash;
92
93 vk::DescriptorSetLayout descriptor_set_layout;
94 DescriptorAllocator descriptor_allocator;
95 VKUpdateDescriptorQueue& update_descriptor_queue;
96 vk::PipelineLayout layout;
97 vk::DescriptorUpdateTemplateKHR descriptor_template;
98 std::vector<vk::ShaderModule> modules;
99
100 vk::Pipeline pipeline;
101};
102
103} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
index 8991505ca..7d0ba1180 100644
--- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
@@ -19,49 +19,27 @@
19#include "video_core/renderer_vulkan/maxwell_to_vk.h" 19#include "video_core/renderer_vulkan/maxwell_to_vk.h"
20#include "video_core/renderer_vulkan/vk_compute_pipeline.h" 20#include "video_core/renderer_vulkan/vk_compute_pipeline.h"
21#include "video_core/renderer_vulkan/vk_descriptor_pool.h" 21#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
22#include "video_core/renderer_vulkan/vk_graphics_pipeline.h"
23#include "video_core/renderer_vulkan/vk_pipeline_cache.h" 22#include "video_core/renderer_vulkan/vk_pipeline_cache.h"
24#include "video_core/renderer_vulkan/vk_rasterizer.h" 23#include "video_core/renderer_vulkan/vk_rasterizer.h"
25#include "video_core/renderer_vulkan/vk_scheduler.h" 24#include "video_core/renderer_vulkan/vk_scheduler.h"
26#include "video_core/renderer_vulkan/vk_update_descriptor.h" 25#include "video_core/renderer_vulkan/vk_update_descriptor.h"
27#include "video_core/shader/compiler_settings.h"
28#include "video_core/shader/memory_util.h"
29#include "video_core/shader_cache.h" 26#include "video_core/shader_cache.h"
30#include "video_core/shader_notify.h" 27#include "video_core/shader_notify.h"
31#include "video_core/vulkan_common/vulkan_device.h" 28#include "video_core/vulkan_common/vulkan_device.h"
32#include "video_core/vulkan_common/vulkan_wrapper.h" 29#include "video_core/vulkan_common/vulkan_wrapper.h"
33 30
34namespace Vulkan { 31namespace Vulkan {
35
36MICROPROFILE_DECLARE(Vulkan_PipelineCache); 32MICROPROFILE_DECLARE(Vulkan_PipelineCache);
37 33
38using Tegra::Engines::ShaderType; 34using Tegra::Engines::ShaderType;
39using VideoCommon::Shader::GetShaderAddress;
40using VideoCommon::Shader::GetShaderCode;
41using VideoCommon::Shader::KERNEL_MAIN_OFFSET;
42using VideoCommon::Shader::ProgramCode;
43using VideoCommon::Shader::STAGE_MAIN_OFFSET;
44 35
45namespace { 36namespace {
46 37size_t StageFromProgram(size_t program) {
47constexpr VkDescriptorType UNIFORM_BUFFER = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
48constexpr VkDescriptorType STORAGE_BUFFER = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
49constexpr VkDescriptorType UNIFORM_TEXEL_BUFFER = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER;
50constexpr VkDescriptorType COMBINED_IMAGE_SAMPLER = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER;
51constexpr VkDescriptorType STORAGE_TEXEL_BUFFER = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER;
52constexpr VkDescriptorType STORAGE_IMAGE = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE;
53
54constexpr VideoCommon::Shader::CompilerSettings compiler_settings{
55 .depth = VideoCommon::Shader::CompileDepth::FullDecompile,
56 .disable_else_derivation = true,
57};
58
59constexpr std::size_t GetStageFromProgram(std::size_t program) {
60 return program == 0 ? 0 : program - 1; 38 return program == 0 ? 0 : program - 1;
61} 39}
62 40
63constexpr ShaderType GetStageFromProgram(Maxwell::ShaderProgram program) { 41ShaderType StageFromProgram(Maxwell::ShaderProgram program) {
64 return static_cast<ShaderType>(GetStageFromProgram(static_cast<std::size_t>(program))); 42 return static_cast<ShaderType>(StageFromProgram(static_cast<size_t>(program)));
65} 43}
66 44
67ShaderType GetShaderType(Maxwell::ShaderProgram program) { 45ShaderType GetShaderType(Maxwell::ShaderProgram program) {
@@ -81,165 +59,35 @@ ShaderType GetShaderType(Maxwell::ShaderProgram program) {
81 return ShaderType::Vertex; 59 return ShaderType::Vertex;
82 } 60 }
83} 61}
84
85template <VkDescriptorType descriptor_type, class Container>
86void AddBindings(std::vector<VkDescriptorSetLayoutBinding>& bindings, u32& binding,
87 VkShaderStageFlags stage_flags, const Container& container) {
88 const u32 num_entries = static_cast<u32>(std::size(container));
89 for (std::size_t i = 0; i < num_entries; ++i) {
90 u32 count = 1;
91 if constexpr (descriptor_type == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER) {
92 // Combined image samplers can be arrayed.
93 count = container[i].size;
94 }
95 bindings.push_back({
96 .binding = binding++,
97 .descriptorType = descriptor_type,
98 .descriptorCount = count,
99 .stageFlags = stage_flags,
100 .pImmutableSamplers = nullptr,
101 });
102 }
103}
104
105u32 FillDescriptorLayout(const ShaderEntries& entries,
106 std::vector<VkDescriptorSetLayoutBinding>& bindings,
107 Maxwell::ShaderProgram program_type, u32 base_binding) {
108 const ShaderType stage = GetStageFromProgram(program_type);
109 const VkShaderStageFlags flags = MaxwellToVK::ShaderStage(stage);
110
111 u32 binding = base_binding;
112 AddBindings<UNIFORM_BUFFER>(bindings, binding, flags, entries.const_buffers);
113 AddBindings<STORAGE_BUFFER>(bindings, binding, flags, entries.global_buffers);
114 AddBindings<UNIFORM_TEXEL_BUFFER>(bindings, binding, flags, entries.uniform_texels);
115 AddBindings<COMBINED_IMAGE_SAMPLER>(bindings, binding, flags, entries.samplers);
116 AddBindings<STORAGE_TEXEL_BUFFER>(bindings, binding, flags, entries.storage_texels);
117 AddBindings<STORAGE_IMAGE>(bindings, binding, flags, entries.images);
118 return binding;
119}
120
121} // Anonymous namespace 62} // Anonymous namespace
122 63
123std::size_t GraphicsPipelineCacheKey::Hash() const noexcept { 64size_t ComputePipelineCacheKey::Hash() const noexcept {
124 const u64 hash = Common::CityHash64(reinterpret_cast<const char*>(this), Size());
125 return static_cast<std::size_t>(hash);
126}
127
128bool GraphicsPipelineCacheKey::operator==(const GraphicsPipelineCacheKey& rhs) const noexcept {
129 return std::memcmp(&rhs, this, Size()) == 0;
130}
131
132std::size_t ComputePipelineCacheKey::Hash() const noexcept {
133 const u64 hash = Common::CityHash64(reinterpret_cast<const char*>(this), sizeof *this); 65 const u64 hash = Common::CityHash64(reinterpret_cast<const char*>(this), sizeof *this);
134 return static_cast<std::size_t>(hash); 66 return static_cast<size_t>(hash);
135} 67}
136 68
137bool ComputePipelineCacheKey::operator==(const ComputePipelineCacheKey& rhs) const noexcept { 69bool ComputePipelineCacheKey::operator==(const ComputePipelineCacheKey& rhs) const noexcept {
138 return std::memcmp(&rhs, this, sizeof *this) == 0; 70 return std::memcmp(&rhs, this, sizeof *this) == 0;
139} 71}
140 72
141Shader::Shader(Tegra::Engines::ConstBufferEngineInterface& engine_, ShaderType stage_, 73Shader::Shader() = default;
142 GPUVAddr gpu_addr_, VAddr cpu_addr_, ProgramCode program_code_, u32 main_offset_)
143 : gpu_addr(gpu_addr_), program_code(std::move(program_code_)), registry(stage_, engine_),
144 shader_ir(program_code, main_offset_, compiler_settings, registry),
145 entries(GenerateShaderEntries(shader_ir)) {}
146 74
147Shader::~Shader() = default; 75Shader::~Shader() = default;
148 76
149VKPipelineCache::VKPipelineCache(RasterizerVulkan& rasterizer_, Tegra::GPU& gpu_, 77PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::GPU& gpu_,
150 Tegra::Engines::Maxwell3D& maxwell3d_, 78 Tegra::Engines::Maxwell3D& maxwell3d_,
151 Tegra::Engines::KeplerCompute& kepler_compute_, 79 Tegra::Engines::KeplerCompute& kepler_compute_,
152 Tegra::MemoryManager& gpu_memory_, const Device& device_, 80 Tegra::MemoryManager& gpu_memory_, const Device& device_,
153 VKScheduler& scheduler_, VKDescriptorPool& descriptor_pool_, 81 VKScheduler& scheduler_, VKDescriptorPool& descriptor_pool_,
154 VKUpdateDescriptorQueue& update_descriptor_queue_) 82 VKUpdateDescriptorQueue& update_descriptor_queue_)
155 : VideoCommon::ShaderCache<Shader>{rasterizer_}, gpu{gpu_}, maxwell3d{maxwell3d_}, 83 : VideoCommon::ShaderCache<Shader>{rasterizer_}, gpu{gpu_}, maxwell3d{maxwell3d_},
156 kepler_compute{kepler_compute_}, gpu_memory{gpu_memory_}, device{device_}, 84 kepler_compute{kepler_compute_}, gpu_memory{gpu_memory_}, device{device_},
157 scheduler{scheduler_}, descriptor_pool{descriptor_pool_}, update_descriptor_queue{ 85 scheduler{scheduler_}, descriptor_pool{descriptor_pool_}, update_descriptor_queue{
158 update_descriptor_queue_} {} 86 update_descriptor_queue_} {}
159 87
160VKPipelineCache::~VKPipelineCache() = default; 88PipelineCache::~PipelineCache() = default;
161
162std::array<Shader*, Maxwell::MaxShaderProgram> VKPipelineCache::GetShaders() {
163 std::array<Shader*, Maxwell::MaxShaderProgram> shaders{};
164
165 for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) {
166 const auto program{static_cast<Maxwell::ShaderProgram>(index)};
167
168 // Skip stages that are not enabled
169 if (!maxwell3d.regs.IsShaderConfigEnabled(index)) {
170 continue;
171 }
172
173 const GPUVAddr gpu_addr{GetShaderAddress(maxwell3d, program)};
174 const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr);
175 ASSERT(cpu_addr);
176
177 Shader* result = cpu_addr ? TryGet(*cpu_addr) : null_shader.get();
178 if (!result) {
179 const u8* const host_ptr{gpu_memory.GetPointer(gpu_addr)};
180
181 // No shader found - create a new one
182 static constexpr u32 stage_offset = STAGE_MAIN_OFFSET;
183 const auto stage = static_cast<ShaderType>(index == 0 ? 0 : index - 1);
184 ProgramCode code = GetShaderCode(gpu_memory, gpu_addr, host_ptr, false);
185 const std::size_t size_in_bytes = code.size() * sizeof(u64);
186
187 auto shader = std::make_unique<Shader>(maxwell3d, stage, gpu_addr, *cpu_addr,
188 std::move(code), stage_offset);
189 result = shader.get();
190
191 if (cpu_addr) {
192 Register(std::move(shader), *cpu_addr, size_in_bytes);
193 } else {
194 null_shader = std::move(shader);
195 }
196 }
197 shaders[index] = result;
198 }
199 return last_shaders = shaders;
200}
201
202VKGraphicsPipeline* VKPipelineCache::GetGraphicsPipeline(
203 const GraphicsPipelineCacheKey& key, u32 num_color_buffers,
204 VideoCommon::Shader::AsyncShaders& async_shaders) {
205 MICROPROFILE_SCOPE(Vulkan_PipelineCache);
206
207 if (last_graphics_pipeline && last_graphics_key == key) {
208 return last_graphics_pipeline;
209 }
210 last_graphics_key = key;
211
212 if (device.UseAsynchronousShaders() && async_shaders.IsShaderAsync(gpu)) {
213 std::unique_lock lock{pipeline_cache};
214 const auto [pair, is_cache_miss] = graphics_cache.try_emplace(key);
215 if (is_cache_miss) {
216 gpu.ShaderNotify().MarkSharderBuilding();
217 LOG_INFO(Render_Vulkan, "Compile 0x{:016X}", key.Hash());
218 const auto [program, bindings] = DecompileShaders(key.fixed_state);
219 async_shaders.QueueVulkanShader(this, device, scheduler, descriptor_pool,
220 update_descriptor_queue, bindings, program, key,
221 num_color_buffers);
222 }
223 last_graphics_pipeline = pair->second.get();
224 return last_graphics_pipeline;
225 }
226
227 const auto [pair, is_cache_miss] = graphics_cache.try_emplace(key);
228 auto& entry = pair->second;
229 if (is_cache_miss) {
230 gpu.ShaderNotify().MarkSharderBuilding();
231 LOG_INFO(Render_Vulkan, "Compile 0x{:016X}", key.Hash());
232 const auto [program, bindings] = DecompileShaders(key.fixed_state);
233 entry = std::make_unique<VKGraphicsPipeline>(device, scheduler, descriptor_pool,
234 update_descriptor_queue, key, bindings,
235 program, num_color_buffers);
236 gpu.ShaderNotify().MarkShaderComplete();
237 }
238 last_graphics_pipeline = entry.get();
239 return last_graphics_pipeline;
240}
241 89
242VKComputePipeline& VKPipelineCache::GetComputePipeline(const ComputePipelineCacheKey& key) { 90ComputePipeline& PipelineCache::GetComputePipeline(const ComputePipelineCacheKey& key) {
243 MICROPROFILE_SCOPE(Vulkan_PipelineCache); 91 MICROPROFILE_SCOPE(Vulkan_PipelineCache);
244 92
245 const auto [pair, is_cache_miss] = compute_cache.try_emplace(key); 93 const auto [pair, is_cache_miss] = compute_cache.try_emplace(key);
@@ -248,200 +96,9 @@ VKComputePipeline& VKPipelineCache::GetComputePipeline(const ComputePipelineCach
248 return *entry; 96 return *entry;
249 } 97 }
250 LOG_INFO(Render_Vulkan, "Compile 0x{:016X}", key.Hash()); 98 LOG_INFO(Render_Vulkan, "Compile 0x{:016X}", key.Hash());
251 99 throw "Bad";
252 const GPUVAddr gpu_addr = key.shader;
253
254 const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr);
255 ASSERT(cpu_addr);
256
257 Shader* shader = cpu_addr ? TryGet(*cpu_addr) : null_kernel.get();
258 if (!shader) {
259 // No shader found - create a new one
260 const auto host_ptr = gpu_memory.GetPointer(gpu_addr);
261
262 ProgramCode code = GetShaderCode(gpu_memory, gpu_addr, host_ptr, true);
263 const std::size_t size_in_bytes = code.size() * sizeof(u64);
264
265 auto shader_info = std::make_unique<Shader>(kepler_compute, ShaderType::Compute, gpu_addr,
266 *cpu_addr, std::move(code), KERNEL_MAIN_OFFSET);
267 shader = shader_info.get();
268
269 if (cpu_addr) {
270 Register(std::move(shader_info), *cpu_addr, size_in_bytes);
271 } else {
272 null_kernel = std::move(shader_info);
273 }
274 }
275
276 const Specialization specialization{
277 .base_binding = 0,
278 .workgroup_size = key.workgroup_size,
279 .shared_memory_size = key.shared_memory_size,
280 .point_size = std::nullopt,
281 .enabled_attributes = {},
282 .attribute_types = {},
283 .ndc_minus_one_to_one = false,
284 };
285 const SPIRVShader spirv_shader{Decompile(device, shader->GetIR(), ShaderType::Compute,
286 shader->GetRegistry(), specialization),
287 shader->GetEntries()};
288 entry = std::make_unique<VKComputePipeline>(device, scheduler, descriptor_pool,
289 update_descriptor_queue, spirv_shader);
290 return *entry;
291}
292
293void VKPipelineCache::EmplacePipeline(std::unique_ptr<VKGraphicsPipeline> pipeline) {
294 gpu.ShaderNotify().MarkShaderComplete();
295 std::unique_lock lock{pipeline_cache};
296 graphics_cache.at(pipeline->GetCacheKey()) = std::move(pipeline);
297}
298
299void VKPipelineCache::OnShaderRemoval(Shader* shader) {
300 bool finished = false;
301 const auto Finish = [&] {
302 // TODO(Rodrigo): Instead of finishing here, wait for the fences that use this pipeline and
303 // flush.
304 if (finished) {
305 return;
306 }
307 finished = true;
308 scheduler.Finish();
309 };
310
311 const GPUVAddr invalidated_addr = shader->GetGpuAddr();
312 for (auto it = graphics_cache.begin(); it != graphics_cache.end();) {
313 auto& entry = it->first;
314 if (std::find(entry.shaders.begin(), entry.shaders.end(), invalidated_addr) ==
315 entry.shaders.end()) {
316 ++it;
317 continue;
318 }
319 Finish();
320 it = graphics_cache.erase(it);
321 }
322 for (auto it = compute_cache.begin(); it != compute_cache.end();) {
323 auto& entry = it->first;
324 if (entry.shader != invalidated_addr) {
325 ++it;
326 continue;
327 }
328 Finish();
329 it = compute_cache.erase(it);
330 }
331}
332
333std::pair<SPIRVProgram, std::vector<VkDescriptorSetLayoutBinding>>
334VKPipelineCache::DecompileShaders(const FixedPipelineState& fixed_state) {
335 Specialization specialization;
336 if (fixed_state.topology == Maxwell::PrimitiveTopology::Points) {
337 float point_size;
338 std::memcpy(&point_size, &fixed_state.point_size, sizeof(float));
339 specialization.point_size = point_size;
340 ASSERT(point_size != 0.0f);
341 }
342 for (std::size_t i = 0; i < Maxwell::NumVertexAttributes; ++i) {
343 const auto& attribute = fixed_state.attributes[i];
344 specialization.enabled_attributes[i] = attribute.enabled.Value() != 0;
345 specialization.attribute_types[i] = attribute.Type();
346 }
347 specialization.ndc_minus_one_to_one = fixed_state.ndc_minus_one_to_one;
348 specialization.early_fragment_tests = fixed_state.early_z;
349
350 // Alpha test
351 specialization.alpha_test_func =
352 FixedPipelineState::UnpackComparisonOp(fixed_state.alpha_test_func.Value());
353 specialization.alpha_test_ref = Common::BitCast<float>(fixed_state.alpha_test_ref);
354
355 SPIRVProgram program;
356 std::vector<VkDescriptorSetLayoutBinding> bindings;
357
358 for (std::size_t index = 1; index < Maxwell::MaxShaderProgram; ++index) {
359 const auto program_enum = static_cast<Maxwell::ShaderProgram>(index);
360 // Skip stages that are not enabled
361 if (!maxwell3d.regs.IsShaderConfigEnabled(index)) {
362 continue;
363 }
364 const GPUVAddr gpu_addr = GetShaderAddress(maxwell3d, program_enum);
365 const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr);
366 Shader* const shader = cpu_addr ? TryGet(*cpu_addr) : null_shader.get();
367
368 const std::size_t stage = index == 0 ? 0 : index - 1; // Stage indices are 0 - 5
369 const ShaderType program_type = GetShaderType(program_enum);
370 const auto& entries = shader->GetEntries();
371 program[stage] = {
372 Decompile(device, shader->GetIR(), program_type, shader->GetRegistry(), specialization),
373 entries,
374 };
375
376 const u32 old_binding = specialization.base_binding;
377 specialization.base_binding =
378 FillDescriptorLayout(entries, bindings, program_enum, specialization.base_binding);
379 ASSERT(old_binding + entries.NumBindings() == specialization.base_binding);
380 }
381 return {std::move(program), std::move(bindings)};
382} 100}
383 101
384template <VkDescriptorType descriptor_type, class Container> 102void PipelineCache::OnShaderRemoval(Shader*) {}
385void AddEntry(std::vector<VkDescriptorUpdateTemplateEntry>& template_entries, u32& binding,
386 u32& offset, const Container& container) {
387 static constexpr u32 entry_size = static_cast<u32>(sizeof(DescriptorUpdateEntry));
388 const u32 count = static_cast<u32>(std::size(container));
389
390 if constexpr (descriptor_type == COMBINED_IMAGE_SAMPLER) {
391 for (u32 i = 0; i < count; ++i) {
392 const u32 num_samplers = container[i].size;
393 template_entries.push_back({
394 .dstBinding = binding,
395 .dstArrayElement = 0,
396 .descriptorCount = num_samplers,
397 .descriptorType = descriptor_type,
398 .offset = offset,
399 .stride = entry_size,
400 });
401
402 ++binding;
403 offset += num_samplers * entry_size;
404 }
405 return;
406 }
407
408 if constexpr (descriptor_type == UNIFORM_TEXEL_BUFFER ||
409 descriptor_type == STORAGE_TEXEL_BUFFER) {
410 // Nvidia has a bug where updating multiple texels at once causes the driver to crash.
411 // Note: Fixed in driver Windows 443.24, Linux 440.66.15
412 for (u32 i = 0; i < count; ++i) {
413 template_entries.push_back({
414 .dstBinding = binding + i,
415 .dstArrayElement = 0,
416 .descriptorCount = 1,
417 .descriptorType = descriptor_type,
418 .offset = static_cast<std::size_t>(offset + i * entry_size),
419 .stride = entry_size,
420 });
421 }
422 } else if (count > 0) {
423 template_entries.push_back({
424 .dstBinding = binding,
425 .dstArrayElement = 0,
426 .descriptorCount = count,
427 .descriptorType = descriptor_type,
428 .offset = offset,
429 .stride = entry_size,
430 });
431 }
432 offset += count * entry_size;
433 binding += count;
434}
435
436void FillDescriptorUpdateTemplateEntries(
437 const ShaderEntries& entries, u32& binding, u32& offset,
438 std::vector<VkDescriptorUpdateTemplateEntryKHR>& template_entries) {
439 AddEntry<UNIFORM_BUFFER>(template_entries, offset, binding, entries.const_buffers);
440 AddEntry<STORAGE_BUFFER>(template_entries, offset, binding, entries.global_buffers);
441 AddEntry<UNIFORM_TEXEL_BUFFER>(template_entries, offset, binding, entries.uniform_texels);
442 AddEntry<COMBINED_IMAGE_SAMPLER>(template_entries, offset, binding, entries.samplers);
443 AddEntry<STORAGE_TEXEL_BUFFER>(template_entries, offset, binding, entries.storage_texels);
444 AddEntry<STORAGE_IMAGE>(template_entries, offset, binding, entries.images);
445}
446 103
447} // namespace Vulkan 104} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h
index 89d635a3d..e3e63340d 100644
--- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h
@@ -15,15 +15,8 @@
15#include <boost/functional/hash.hpp> 15#include <boost/functional/hash.hpp>
16 16
17#include "common/common_types.h" 17#include "common/common_types.h"
18#include "video_core/engines/const_buffer_engine_interface.h"
19#include "video_core/engines/maxwell_3d.h" 18#include "video_core/engines/maxwell_3d.h"
20#include "video_core/renderer_vulkan/fixed_pipeline_state.h" 19#include "video_core/renderer_vulkan/fixed_pipeline_state.h"
21#include "video_core/renderer_vulkan/vk_graphics_pipeline.h"
22#include "video_core/renderer_vulkan/vk_shader_decompiler.h"
23#include "video_core/shader/async_shaders.h"
24#include "video_core/shader/memory_util.h"
25#include "video_core/shader/registry.h"
26#include "video_core/shader/shader_ir.h"
27#include "video_core/shader_cache.h" 20#include "video_core/shader_cache.h"
28#include "video_core/vulkan_common/vulkan_wrapper.h" 21#include "video_core/vulkan_common/vulkan_wrapper.h"
29 22
@@ -35,7 +28,7 @@ namespace Vulkan {
35 28
36class Device; 29class Device;
37class RasterizerVulkan; 30class RasterizerVulkan;
38class VKComputePipeline; 31class ComputePipeline;
39class VKDescriptorPool; 32class VKDescriptorPool;
40class VKScheduler; 33class VKScheduler;
41class VKUpdateDescriptorQueue; 34class VKUpdateDescriptorQueue;
@@ -47,7 +40,7 @@ struct ComputePipelineCacheKey {
47 u32 shared_memory_size; 40 u32 shared_memory_size;
48 std::array<u32, 3> workgroup_size; 41 std::array<u32, 3> workgroup_size;
49 42
50 std::size_t Hash() const noexcept; 43 size_t Hash() const noexcept;
51 44
52 bool operator==(const ComputePipelineCacheKey& rhs) const noexcept; 45 bool operator==(const ComputePipelineCacheKey& rhs) const noexcept;
53 46
@@ -64,15 +57,8 @@ static_assert(std::is_trivially_constructible_v<ComputePipelineCacheKey>);
64namespace std { 57namespace std {
65 58
66template <> 59template <>
67struct hash<Vulkan::GraphicsPipelineCacheKey> {
68 std::size_t operator()(const Vulkan::GraphicsPipelineCacheKey& k) const noexcept {
69 return k.Hash();
70 }
71};
72
73template <>
74struct hash<Vulkan::ComputePipelineCacheKey> { 60struct hash<Vulkan::ComputePipelineCacheKey> {
75 std::size_t operator()(const Vulkan::ComputePipelineCacheKey& k) const noexcept { 61 size_t operator()(const Vulkan::ComputePipelineCacheKey& k) const noexcept {
76 return k.Hash(); 62 return k.Hash();
77 } 63 }
78}; 64};
@@ -83,66 +69,26 @@ namespace Vulkan {
83 69
84class Shader { 70class Shader {
85public: 71public:
86 explicit Shader(Tegra::Engines::ConstBufferEngineInterface& engine_, 72 explicit Shader();
87 Tegra::Engines::ShaderType stage_, GPUVAddr gpu_addr, VAddr cpu_addr_,
88 VideoCommon::Shader::ProgramCode program_code, u32 main_offset_);
89 ~Shader(); 73 ~Shader();
90
91 GPUVAddr GetGpuAddr() const {
92 return gpu_addr;
93 }
94
95 VideoCommon::Shader::ShaderIR& GetIR() {
96 return shader_ir;
97 }
98
99 const VideoCommon::Shader::ShaderIR& GetIR() const {
100 return shader_ir;
101 }
102
103 const VideoCommon::Shader::Registry& GetRegistry() const {
104 return registry;
105 }
106
107 const ShaderEntries& GetEntries() const {
108 return entries;
109 }
110
111private:
112 GPUVAddr gpu_addr{};
113 VideoCommon::Shader::ProgramCode program_code;
114 VideoCommon::Shader::Registry registry;
115 VideoCommon::Shader::ShaderIR shader_ir;
116 ShaderEntries entries;
117}; 74};
118 75
119class VKPipelineCache final : public VideoCommon::ShaderCache<Shader> { 76class PipelineCache final : public VideoCommon::ShaderCache<Shader> {
120public: 77public:
121 explicit VKPipelineCache(RasterizerVulkan& rasterizer, Tegra::GPU& gpu, 78 explicit PipelineCache(RasterizerVulkan& rasterizer, Tegra::GPU& gpu,
122 Tegra::Engines::Maxwell3D& maxwell3d, 79 Tegra::Engines::Maxwell3D& maxwell3d,
123 Tegra::Engines::KeplerCompute& kepler_compute, 80 Tegra::Engines::KeplerCompute& kepler_compute,
124 Tegra::MemoryManager& gpu_memory, const Device& device, 81 Tegra::MemoryManager& gpu_memory, const Device& device,
125 VKScheduler& scheduler, VKDescriptorPool& descriptor_pool, 82 VKScheduler& scheduler, VKDescriptorPool& descriptor_pool,
126 VKUpdateDescriptorQueue& update_descriptor_queue); 83 VKUpdateDescriptorQueue& update_descriptor_queue);
127 ~VKPipelineCache() override; 84 ~PipelineCache() override;
128
129 std::array<Shader*, Maxwell::MaxShaderProgram> GetShaders();
130 85
131 VKGraphicsPipeline* GetGraphicsPipeline(const GraphicsPipelineCacheKey& key, 86 ComputePipeline& GetComputePipeline(const ComputePipelineCacheKey& key);
132 u32 num_color_buffers,
133 VideoCommon::Shader::AsyncShaders& async_shaders);
134
135 VKComputePipeline& GetComputePipeline(const ComputePipelineCacheKey& key);
136
137 void EmplacePipeline(std::unique_ptr<VKGraphicsPipeline> pipeline);
138 87
139protected: 88protected:
140 void OnShaderRemoval(Shader* shader) final; 89 void OnShaderRemoval(Shader* shader) final;
141 90
142private: 91private:
143 std::pair<SPIRVProgram, std::vector<VkDescriptorSetLayoutBinding>> DecompileShaders(
144 const FixedPipelineState& fixed_state);
145
146 Tegra::GPU& gpu; 92 Tegra::GPU& gpu;
147 Tegra::Engines::Maxwell3D& maxwell3d; 93 Tegra::Engines::Maxwell3D& maxwell3d;
148 Tegra::Engines::KeplerCompute& kepler_compute; 94 Tegra::Engines::KeplerCompute& kepler_compute;
@@ -158,17 +104,8 @@ private:
158 104
159 std::array<Shader*, Maxwell::MaxShaderProgram> last_shaders{}; 105 std::array<Shader*, Maxwell::MaxShaderProgram> last_shaders{};
160 106
161 GraphicsPipelineCacheKey last_graphics_key;
162 VKGraphicsPipeline* last_graphics_pipeline = nullptr;
163
164 std::mutex pipeline_cache; 107 std::mutex pipeline_cache;
165 std::unordered_map<GraphicsPipelineCacheKey, std::unique_ptr<VKGraphicsPipeline>> 108 std::unordered_map<ComputePipelineCacheKey, std::unique_ptr<ComputePipeline>> compute_cache;
166 graphics_cache;
167 std::unordered_map<ComputePipelineCacheKey, std::unique_ptr<VKComputePipeline>> compute_cache;
168}; 109};
169 110
170void FillDescriptorUpdateTemplateEntries(
171 const ShaderEntries& entries, u32& binding, u32& offset,
172 std::vector<VkDescriptorUpdateTemplateEntryKHR>& template_entries);
173
174} // namespace Vulkan 111} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
index f57c15b37..f152297d9 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
@@ -24,7 +24,6 @@
24#include "video_core/renderer_vulkan/vk_buffer_cache.h" 24#include "video_core/renderer_vulkan/vk_buffer_cache.h"
25#include "video_core/renderer_vulkan/vk_compute_pipeline.h" 25#include "video_core/renderer_vulkan/vk_compute_pipeline.h"
26#include "video_core/renderer_vulkan/vk_descriptor_pool.h" 26#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
27#include "video_core/renderer_vulkan/vk_graphics_pipeline.h"
28#include "video_core/renderer_vulkan/vk_pipeline_cache.h" 27#include "video_core/renderer_vulkan/vk_pipeline_cache.h"
29#include "video_core/renderer_vulkan/vk_rasterizer.h" 28#include "video_core/renderer_vulkan/vk_rasterizer.h"
30#include "video_core/renderer_vulkan/vk_scheduler.h" 29#include "video_core/renderer_vulkan/vk_scheduler.h"
@@ -97,15 +96,6 @@ VkRect2D GetScissorState(const Maxwell& regs, size_t index) {
97 return scissor; 96 return scissor;
98} 97}
99 98
100std::array<GPUVAddr, Maxwell::MaxShaderProgram> GetShaderAddresses(
101 const std::array<Shader*, Maxwell::MaxShaderProgram>& shaders) {
102 std::array<GPUVAddr, Maxwell::MaxShaderProgram> addresses;
103 for (size_t i = 0; i < std::size(addresses); ++i) {
104 addresses[i] = shaders[i] ? shaders[i]->GetGpuAddr() : 0;
105 }
106 return addresses;
107}
108
109struct TextureHandle { 99struct TextureHandle {
110 constexpr TextureHandle(u32 data, bool via_header_index) { 100 constexpr TextureHandle(u32 data, bool via_header_index) {
111 const Tegra::Texture::TextureHandle handle{data}; 101 const Tegra::Texture::TextureHandle handle{data};
@@ -117,98 +107,6 @@ struct TextureHandle {
117 u32 sampler; 107 u32 sampler;
118}; 108};
119 109
120template <typename Engine, typename Entry>
121TextureHandle GetTextureInfo(const Engine& engine, bool via_header_index, const Entry& entry,
122 size_t stage, size_t index = 0) {
123 const auto shader_type = static_cast<Tegra::Engines::ShaderType>(stage);
124 if constexpr (std::is_same_v<Entry, SamplerEntry>) {
125 if (entry.is_separated) {
126 const u32 buffer_1 = entry.buffer;
127 const u32 buffer_2 = entry.secondary_buffer;
128 const u32 offset_1 = entry.offset;
129 const u32 offset_2 = entry.secondary_offset;
130 const u32 handle_1 = engine.AccessConstBuffer32(shader_type, buffer_1, offset_1);
131 const u32 handle_2 = engine.AccessConstBuffer32(shader_type, buffer_2, offset_2);
132 return TextureHandle(handle_1 | handle_2, via_header_index);
133 }
134 }
135 if (entry.is_bindless) {
136 const u32 raw = engine.AccessConstBuffer32(shader_type, entry.buffer, entry.offset);
137 return TextureHandle(raw, via_header_index);
138 }
139 const u32 buffer = engine.GetBoundBuffer();
140 const u64 offset = (entry.offset + index) * sizeof(u32);
141 return TextureHandle(engine.AccessConstBuffer32(shader_type, buffer, offset), via_header_index);
142}
143
144ImageViewType ImageViewTypeFromEntry(const SamplerEntry& entry) {
145 if (entry.is_buffer) {
146 return ImageViewType::e2D;
147 }
148 switch (entry.type) {
149 case Tegra::Shader::TextureType::Texture1D:
150 return entry.is_array ? ImageViewType::e1DArray : ImageViewType::e1D;
151 case Tegra::Shader::TextureType::Texture2D:
152 return entry.is_array ? ImageViewType::e2DArray : ImageViewType::e2D;
153 case Tegra::Shader::TextureType::Texture3D:
154 return ImageViewType::e3D;
155 case Tegra::Shader::TextureType::TextureCube:
156 return entry.is_array ? ImageViewType::CubeArray : ImageViewType::Cube;
157 }
158 UNREACHABLE();
159 return ImageViewType::e2D;
160}
161
162ImageViewType ImageViewTypeFromEntry(const ImageEntry& entry) {
163 switch (entry.type) {
164 case Tegra::Shader::ImageType::Texture1D:
165 return ImageViewType::e1D;
166 case Tegra::Shader::ImageType::Texture1DArray:
167 return ImageViewType::e1DArray;
168 case Tegra::Shader::ImageType::Texture2D:
169 return ImageViewType::e2D;
170 case Tegra::Shader::ImageType::Texture2DArray:
171 return ImageViewType::e2DArray;
172 case Tegra::Shader::ImageType::Texture3D:
173 return ImageViewType::e3D;
174 case Tegra::Shader::ImageType::TextureBuffer:
175 return ImageViewType::Buffer;
176 }
177 UNREACHABLE();
178 return ImageViewType::e2D;
179}
180
181void PushImageDescriptors(const ShaderEntries& entries, TextureCache& texture_cache,
182 VKUpdateDescriptorQueue& update_descriptor_queue,
183 ImageViewId*& image_view_id_ptr, VkSampler*& sampler_ptr) {
184 for ([[maybe_unused]] const auto& entry : entries.uniform_texels) {
185 const ImageViewId image_view_id = *image_view_id_ptr++;
186 const ImageView& image_view = texture_cache.GetImageView(image_view_id);
187 update_descriptor_queue.AddTexelBuffer(image_view.BufferView());
188 }
189 for (const auto& entry : entries.samplers) {
190 for (size_t i = 0; i < entry.size; ++i) {
191 const VkSampler sampler = *sampler_ptr++;
192 const ImageViewId image_view_id = *image_view_id_ptr++;
193 const ImageView& image_view = texture_cache.GetImageView(image_view_id);
194 const VkImageView handle = image_view.Handle(ImageViewTypeFromEntry(entry));
195 update_descriptor_queue.AddSampledImage(handle, sampler);
196 }
197 }
198 for ([[maybe_unused]] const auto& entry : entries.storage_texels) {
199 const ImageViewId image_view_id = *image_view_id_ptr++;
200 const ImageView& image_view = texture_cache.GetImageView(image_view_id);
201 update_descriptor_queue.AddTexelBuffer(image_view.BufferView());
202 }
203 for (const auto& entry : entries.images) {
204 // TODO: Mark as modified
205 const ImageViewId image_view_id = *image_view_id_ptr++;
206 const ImageView& image_view = texture_cache.GetImageView(image_view_id);
207 const VkImageView handle = image_view.Handle(ImageViewTypeFromEntry(entry));
208 update_descriptor_queue.AddImage(handle);
209 }
210}
211
212DrawParams MakeDrawParams(const Maxwell& regs, u32 num_instances, bool is_instanced, 110DrawParams MakeDrawParams(const Maxwell& regs, u32 num_instances, bool is_instanced,
213 bool is_indexed) { 111 bool is_indexed) {
214 DrawParams params{ 112 DrawParams params{
@@ -253,71 +151,14 @@ RasterizerVulkan::RasterizerVulkan(Core::Frontend::EmuWindow& emu_window_, Tegra
253 descriptor_pool, update_descriptor_queue), 151 descriptor_pool, update_descriptor_queue),
254 query_cache{*this, maxwell3d, gpu_memory, device, scheduler}, accelerate_dma{buffer_cache}, 152 query_cache{*this, maxwell3d, gpu_memory, device, scheduler}, accelerate_dma{buffer_cache},
255 fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache, device, scheduler), 153 fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache, device, scheduler),
256 wfi_event(device.GetLogical().CreateEvent()), async_shaders(emu_window_) { 154 wfi_event(device.GetLogical().CreateEvent()) {
257 scheduler.SetQueryCache(query_cache); 155 scheduler.SetQueryCache(query_cache);
258 if (device.UseAsynchronousShaders()) {
259 async_shaders.AllocateWorkers();
260 }
261} 156}
262 157
263RasterizerVulkan::~RasterizerVulkan() = default; 158RasterizerVulkan::~RasterizerVulkan() = default;
264 159
265void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) { 160void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) {
266 MICROPROFILE_SCOPE(Vulkan_Drawing); 161 UNREACHABLE_MSG("Rendering not implemented {} {}", is_indexed, is_instanced);
267
268 SCOPE_EXIT({ gpu.TickWork(); });
269 FlushWork();
270
271 query_cache.UpdateCounters();
272
273 graphics_key.fixed_state.Refresh(maxwell3d, device.IsExtExtendedDynamicStateSupported());
274
275 std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex};
276
277 texture_cache.SynchronizeGraphicsDescriptors();
278 texture_cache.UpdateRenderTargets(false);
279
280 const auto shaders = pipeline_cache.GetShaders();
281 graphics_key.shaders = GetShaderAddresses(shaders);
282
283 SetupShaderDescriptors(shaders, is_indexed);
284
285 const Framebuffer* const framebuffer = texture_cache.GetFramebuffer();
286 graphics_key.renderpass = framebuffer->RenderPass();
287
288 VKGraphicsPipeline* const pipeline = pipeline_cache.GetGraphicsPipeline(
289 graphics_key, framebuffer->NumColorBuffers(), async_shaders);
290 if (pipeline == nullptr || pipeline->GetHandle() == VK_NULL_HANDLE) {
291 // Async graphics pipeline was not ready.
292 return;
293 }
294
295 BeginTransformFeedback();
296
297 scheduler.RequestRenderpass(framebuffer);
298 scheduler.BindGraphicsPipeline(pipeline->GetHandle());
299 UpdateDynamicStates();
300
301 const auto& regs = maxwell3d.regs;
302 const u32 num_instances = maxwell3d.mme_draw.instance_count;
303 const DrawParams draw_params = MakeDrawParams(regs, num_instances, is_instanced, is_indexed);
304 const VkPipelineLayout pipeline_layout = pipeline->GetLayout();
305 const VkDescriptorSet descriptor_set = pipeline->CommitDescriptorSet();
306 scheduler.Record([pipeline_layout, descriptor_set, draw_params](vk::CommandBuffer cmdbuf) {
307 if (descriptor_set) {
308 cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline_layout,
309 DESCRIPTOR_SET, descriptor_set, nullptr);
310 }
311 if (draw_params.is_indexed) {
312 cmdbuf.DrawIndexed(draw_params.num_vertices, draw_params.num_instances, 0,
313 draw_params.base_vertex, draw_params.base_instance);
314 } else {
315 cmdbuf.Draw(draw_params.num_vertices, draw_params.num_instances,
316 draw_params.base_vertex, draw_params.base_instance);
317 }
318 });
319
320 EndTransformFeedback();
321} 162}
322 163
323void RasterizerVulkan::Clear() { 164void RasterizerVulkan::Clear() {
@@ -395,73 +236,8 @@ void RasterizerVulkan::Clear() {
395 }); 236 });
396} 237}
397 238
398void RasterizerVulkan::DispatchCompute(GPUVAddr code_addr) { 239void RasterizerVulkan::DispatchCompute() {
399 MICROPROFILE_SCOPE(Vulkan_Compute); 240 UNREACHABLE_MSG("Not implemented");
400
401 query_cache.UpdateCounters();
402
403 const auto& launch_desc = kepler_compute.launch_description;
404 auto& pipeline = pipeline_cache.GetComputePipeline({
405 .shader = code_addr,
406 .shared_memory_size = launch_desc.shared_alloc,
407 .workgroup_size{
408 launch_desc.block_dim_x,
409 launch_desc.block_dim_y,
410 launch_desc.block_dim_z,
411 },
412 });
413
414 // Compute dispatches can't be executed inside a renderpass
415 scheduler.RequestOutsideRenderPassOperationContext();
416
417 image_view_indices.clear();
418 sampler_handles.clear();
419
420 std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex};
421
422 const auto& entries = pipeline.GetEntries();
423 buffer_cache.SetEnabledComputeUniformBuffers(entries.enabled_uniform_buffers);
424 buffer_cache.UnbindComputeStorageBuffers();
425 u32 ssbo_index = 0;
426 for (const auto& buffer : entries.global_buffers) {
427 buffer_cache.BindComputeStorageBuffer(ssbo_index, buffer.cbuf_index, buffer.cbuf_offset,
428 buffer.is_written);
429 ++ssbo_index;
430 }
431 buffer_cache.UpdateComputeBuffers();
432
433 texture_cache.SynchronizeComputeDescriptors();
434
435 SetupComputeUniformTexels(entries);
436 SetupComputeTextures(entries);
437 SetupComputeStorageTexels(entries);
438 SetupComputeImages(entries);
439
440 const std::span indices_span(image_view_indices.data(), image_view_indices.size());
441 texture_cache.FillComputeImageViews(indices_span, image_view_ids);
442
443 update_descriptor_queue.Acquire();
444
445 buffer_cache.BindHostComputeBuffers();
446
447 ImageViewId* image_view_id_ptr = image_view_ids.data();
448 VkSampler* sampler_ptr = sampler_handles.data();
449 PushImageDescriptors(entries, texture_cache, update_descriptor_queue, image_view_id_ptr,
450 sampler_ptr);
451
452 const VkPipeline pipeline_handle = pipeline.GetHandle();
453 const VkPipelineLayout pipeline_layout = pipeline.GetLayout();
454 const VkDescriptorSet descriptor_set = pipeline.CommitDescriptorSet();
455 scheduler.Record([grid_x = launch_desc.grid_dim_x, grid_y = launch_desc.grid_dim_y,
456 grid_z = launch_desc.grid_dim_z, pipeline_handle, pipeline_layout,
457 descriptor_set](vk::CommandBuffer cmdbuf) {
458 cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, pipeline_handle);
459 if (descriptor_set) {
460 cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, pipeline_layout,
461 DESCRIPTOR_SET, descriptor_set, nullptr);
462 }
463 cmdbuf.Dispatch(grid_x, grid_y, grid_z);
464 });
465} 241}
466 242
467void RasterizerVulkan::ResetCounter(VideoCore::QueryType type) { 243void RasterizerVulkan::ResetCounter(VideoCore::QueryType type) {
@@ -716,52 +492,6 @@ bool AccelerateDMA::BufferCopy(GPUVAddr src_address, GPUVAddr dest_address, u64
716 return buffer_cache.DMACopy(src_address, dest_address, amount); 492 return buffer_cache.DMACopy(src_address, dest_address, amount);
717} 493}
718 494
719void RasterizerVulkan::SetupShaderDescriptors(
720 const std::array<Shader*, Maxwell::MaxShaderProgram>& shaders, bool is_indexed) {
721 image_view_indices.clear();
722 sampler_handles.clear();
723 for (size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) {
724 Shader* const shader = shaders[stage + 1];
725 if (!shader) {
726 continue;
727 }
728 const ShaderEntries& entries = shader->GetEntries();
729 SetupGraphicsUniformTexels(entries, stage);
730 SetupGraphicsTextures(entries, stage);
731 SetupGraphicsStorageTexels(entries, stage);
732 SetupGraphicsImages(entries, stage);
733
734 buffer_cache.SetEnabledUniformBuffers(stage, entries.enabled_uniform_buffers);
735 buffer_cache.UnbindGraphicsStorageBuffers(stage);
736 u32 ssbo_index = 0;
737 for (const auto& buffer : entries.global_buffers) {
738 buffer_cache.BindGraphicsStorageBuffer(stage, ssbo_index, buffer.cbuf_index,
739 buffer.cbuf_offset, buffer.is_written);
740 ++ssbo_index;
741 }
742 }
743 const std::span indices_span(image_view_indices.data(), image_view_indices.size());
744 buffer_cache.UpdateGraphicsBuffers(is_indexed);
745 texture_cache.FillGraphicsImageViews(indices_span, image_view_ids);
746
747 buffer_cache.BindHostGeometryBuffers(is_indexed);
748
749 update_descriptor_queue.Acquire();
750
751 ImageViewId* image_view_id_ptr = image_view_ids.data();
752 VkSampler* sampler_ptr = sampler_handles.data();
753 for (size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) {
754 // Skip VertexA stage
755 Shader* const shader = shaders[stage + 1];
756 if (!shader) {
757 continue;
758 }
759 buffer_cache.BindHostStageBuffers(stage);
760 PushImageDescriptors(shader->GetEntries(), texture_cache, update_descriptor_queue,
761 image_view_id_ptr, sampler_ptr);
762 }
763}
764
765void RasterizerVulkan::UpdateDynamicStates() { 495void RasterizerVulkan::UpdateDynamicStates() {
766 auto& regs = maxwell3d.regs; 496 auto& regs = maxwell3d.regs;
767 UpdateViewportsState(regs); 497 UpdateViewportsState(regs);
@@ -810,89 +540,6 @@ void RasterizerVulkan::EndTransformFeedback() {
810 [](vk::CommandBuffer cmdbuf) { cmdbuf.EndTransformFeedbackEXT(0, 0, nullptr, nullptr); }); 540 [](vk::CommandBuffer cmdbuf) { cmdbuf.EndTransformFeedbackEXT(0, 0, nullptr, nullptr); });
811} 541}
812 542
813void RasterizerVulkan::SetupGraphicsUniformTexels(const ShaderEntries& entries, size_t stage) {
814 const auto& regs = maxwell3d.regs;
815 const bool via_header_index = regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex;
816 for (const auto& entry : entries.uniform_texels) {
817 const TextureHandle handle = GetTextureInfo(maxwell3d, via_header_index, entry, stage);
818 image_view_indices.push_back(handle.image);
819 }
820}
821
822void RasterizerVulkan::SetupGraphicsTextures(const ShaderEntries& entries, size_t stage) {
823 const auto& regs = maxwell3d.regs;
824 const bool via_header_index = regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex;
825 for (const auto& entry : entries.samplers) {
826 for (size_t index = 0; index < entry.size; ++index) {
827 const TextureHandle handle =
828 GetTextureInfo(maxwell3d, via_header_index, entry, stage, index);
829 image_view_indices.push_back(handle.image);
830
831 Sampler* const sampler = texture_cache.GetGraphicsSampler(handle.sampler);
832 sampler_handles.push_back(sampler->Handle());
833 }
834 }
835}
836
837void RasterizerVulkan::SetupGraphicsStorageTexels(const ShaderEntries& entries, size_t stage) {
838 const auto& regs = maxwell3d.regs;
839 const bool via_header_index = regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex;
840 for (const auto& entry : entries.storage_texels) {
841 const TextureHandle handle = GetTextureInfo(maxwell3d, via_header_index, entry, stage);
842 image_view_indices.push_back(handle.image);
843 }
844}
845
846void RasterizerVulkan::SetupGraphicsImages(const ShaderEntries& entries, size_t stage) {
847 const auto& regs = maxwell3d.regs;
848 const bool via_header_index = regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex;
849 for (const auto& entry : entries.images) {
850 const TextureHandle handle = GetTextureInfo(maxwell3d, via_header_index, entry, stage);
851 image_view_indices.push_back(handle.image);
852 }
853}
854
855void RasterizerVulkan::SetupComputeUniformTexels(const ShaderEntries& entries) {
856 const bool via_header_index = kepler_compute.launch_description.linked_tsc;
857 for (const auto& entry : entries.uniform_texels) {
858 const TextureHandle handle =
859 GetTextureInfo(kepler_compute, via_header_index, entry, COMPUTE_SHADER_INDEX);
860 image_view_indices.push_back(handle.image);
861 }
862}
863
864void RasterizerVulkan::SetupComputeTextures(const ShaderEntries& entries) {
865 const bool via_header_index = kepler_compute.launch_description.linked_tsc;
866 for (const auto& entry : entries.samplers) {
867 for (size_t index = 0; index < entry.size; ++index) {
868 const TextureHandle handle = GetTextureInfo(kepler_compute, via_header_index, entry,
869 COMPUTE_SHADER_INDEX, index);
870 image_view_indices.push_back(handle.image);
871
872 Sampler* const sampler = texture_cache.GetComputeSampler(handle.sampler);
873 sampler_handles.push_back(sampler->Handle());
874 }
875 }
876}
877
878void RasterizerVulkan::SetupComputeStorageTexels(const ShaderEntries& entries) {
879 const bool via_header_index = kepler_compute.launch_description.linked_tsc;
880 for (const auto& entry : entries.storage_texels) {
881 const TextureHandle handle =
882 GetTextureInfo(kepler_compute, via_header_index, entry, COMPUTE_SHADER_INDEX);
883 image_view_indices.push_back(handle.image);
884 }
885}
886
887void RasterizerVulkan::SetupComputeImages(const ShaderEntries& entries) {
888 const bool via_header_index = kepler_compute.launch_description.linked_tsc;
889 for (const auto& entry : entries.images) {
890 const TextureHandle handle =
891 GetTextureInfo(kepler_compute, via_header_index, entry, COMPUTE_SHADER_INDEX);
892 image_view_indices.push_back(handle.image);
893 }
894}
895
896void RasterizerVulkan::UpdateViewportsState(Tegra::Engines::Maxwell3D::Regs& regs) { 543void RasterizerVulkan::UpdateViewportsState(Tegra::Engines::Maxwell3D::Regs& regs) {
897 if (!state_tracker.TouchViewports()) { 544 if (!state_tracker.TouchViewports()) {
898 return; 545 return;
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h
index 2065209be..31017dc2b 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.h
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.h
@@ -28,7 +28,6 @@
28#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" 28#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h"
29#include "video_core/renderer_vulkan/vk_texture_cache.h" 29#include "video_core/renderer_vulkan/vk_texture_cache.h"
30#include "video_core/renderer_vulkan/vk_update_descriptor.h" 30#include "video_core/renderer_vulkan/vk_update_descriptor.h"
31#include "video_core/shader/async_shaders.h"
32#include "video_core/vulkan_common/vulkan_memory_allocator.h" 31#include "video_core/vulkan_common/vulkan_memory_allocator.h"
33#include "video_core/vulkan_common/vulkan_wrapper.h" 32#include "video_core/vulkan_common/vulkan_wrapper.h"
34 33
@@ -73,7 +72,7 @@ public:
73 72
74 void Draw(bool is_indexed, bool is_instanced) override; 73 void Draw(bool is_indexed, bool is_instanced) override;
75 void Clear() override; 74 void Clear() override;
76 void DispatchCompute(GPUVAddr code_addr) override; 75 void DispatchCompute() override;
77 void ResetCounter(VideoCore::QueryType type) override; 76 void ResetCounter(VideoCore::QueryType type) override;
78 void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) override; 77 void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) override;
79 void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, u32 size) override; 78 void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, u32 size) override;
@@ -103,19 +102,6 @@ public:
103 bool AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr, 102 bool AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr,
104 u32 pixel_stride) override; 103 u32 pixel_stride) override;
105 104
106 VideoCommon::Shader::AsyncShaders& GetAsyncShaders() {
107 return async_shaders;
108 }
109
110 const VideoCommon::Shader::AsyncShaders& GetAsyncShaders() const {
111 return async_shaders;
112 }
113
114 /// Maximum supported size that a constbuffer can have in bytes.
115 static constexpr size_t MaxConstbufferSize = 0x10000;
116 static_assert(MaxConstbufferSize % (4 * sizeof(float)) == 0,
117 "The maximum size of a constbuffer must be a multiple of the size of GLvec4");
118
119private: 105private:
120 static constexpr size_t MAX_TEXTURES = 192; 106 static constexpr size_t MAX_TEXTURES = 192;
121 static constexpr size_t MAX_IMAGES = 48; 107 static constexpr size_t MAX_IMAGES = 48;
@@ -125,40 +111,12 @@ private:
125 111
126 void FlushWork(); 112 void FlushWork();
127 113
128 /// Setup descriptors in the graphics pipeline.
129 void SetupShaderDescriptors(const std::array<Shader*, Maxwell::MaxShaderProgram>& shaders,
130 bool is_indexed);
131
132 void UpdateDynamicStates(); 114 void UpdateDynamicStates();
133 115
134 void BeginTransformFeedback(); 116 void BeginTransformFeedback();
135 117
136 void EndTransformFeedback(); 118 void EndTransformFeedback();
137 119
138 /// Setup uniform texels in the graphics pipeline.
139 void SetupGraphicsUniformTexels(const ShaderEntries& entries, std::size_t stage);
140
141 /// Setup textures in the graphics pipeline.
142 void SetupGraphicsTextures(const ShaderEntries& entries, std::size_t stage);
143
144 /// Setup storage texels in the graphics pipeline.
145 void SetupGraphicsStorageTexels(const ShaderEntries& entries, std::size_t stage);
146
147 /// Setup images in the graphics pipeline.
148 void SetupGraphicsImages(const ShaderEntries& entries, std::size_t stage);
149
150 /// Setup texel buffers in the compute pipeline.
151 void SetupComputeUniformTexels(const ShaderEntries& entries);
152
153 /// Setup textures in the compute pipeline.
154 void SetupComputeTextures(const ShaderEntries& entries);
155
156 /// Setup storage texels in the compute pipeline.
157 void SetupComputeStorageTexels(const ShaderEntries& entries);
158
159 /// Setup images in the compute pipeline.
160 void SetupComputeImages(const ShaderEntries& entries);
161
162 void UpdateViewportsState(Tegra::Engines::Maxwell3D::Regs& regs); 120 void UpdateViewportsState(Tegra::Engines::Maxwell3D::Regs& regs);
163 void UpdateScissorsState(Tegra::Engines::Maxwell3D::Regs& regs); 121 void UpdateScissorsState(Tegra::Engines::Maxwell3D::Regs& regs);
164 void UpdateDepthBias(Tegra::Engines::Maxwell3D::Regs& regs); 122 void UpdateDepthBias(Tegra::Engines::Maxwell3D::Regs& regs);
@@ -198,13 +156,12 @@ private:
198 TextureCache texture_cache; 156 TextureCache texture_cache;
199 BufferCacheRuntime buffer_cache_runtime; 157 BufferCacheRuntime buffer_cache_runtime;
200 BufferCache buffer_cache; 158 BufferCache buffer_cache;
201 VKPipelineCache pipeline_cache; 159 PipelineCache pipeline_cache;
202 VKQueryCache query_cache; 160 VKQueryCache query_cache;
203 AccelerateDMA accelerate_dma; 161 AccelerateDMA accelerate_dma;
204 VKFenceManager fence_manager; 162 VKFenceManager fence_manager;
205 163
206 vk::Event wfi_event; 164 vk::Event wfi_event;
207 VideoCommon::Shader::AsyncShaders async_shaders;
208 165
209 boost::container::static_vector<u32, MAX_IMAGE_VIEWS> image_view_indices; 166 boost::container::static_vector<u32, MAX_IMAGE_VIEWS> image_view_indices;
210 std::array<VideoCommon::ImageViewId, MAX_IMAGE_VIEWS> image_view_ids; 167 std::array<VideoCommon::ImageViewId, MAX_IMAGE_VIEWS> image_view_ids;
diff --git a/src/video_core/shader/ast.cpp b/src/video_core/shader/ast.cpp
deleted file mode 100644
index db11144c7..000000000
--- a/src/video_core/shader/ast.cpp
+++ /dev/null
@@ -1,752 +0,0 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <string>
6#include <string_view>
7
8#include <fmt/format.h>
9
10#include "common/assert.h"
11#include "common/common_types.h"
12#include "video_core/shader/ast.h"
13#include "video_core/shader/expr.h"
14
15namespace VideoCommon::Shader {
16
17ASTZipper::ASTZipper() = default;
18
19void ASTZipper::Init(const ASTNode new_first, const ASTNode parent) {
20 ASSERT(new_first->manager == nullptr);
21 first = new_first;
22 last = new_first;
23
24 ASTNode current = first;
25 while (current) {
26 current->manager = this;
27 current->parent = parent;
28 last = current;
29 current = current->next;
30 }
31}
32
33void ASTZipper::PushBack(const ASTNode new_node) {
34 ASSERT(new_node->manager == nullptr);
35 new_node->previous = last;
36 if (last) {
37 last->next = new_node;
38 }
39 new_node->next.reset();
40 last = new_node;
41 if (!first) {
42 first = new_node;
43 }
44 new_node->manager = this;
45}
46
47void ASTZipper::PushFront(const ASTNode new_node) {
48 ASSERT(new_node->manager == nullptr);
49 new_node->previous.reset();
50 new_node->next = first;
51 if (first) {
52 first->previous = new_node;
53 }
54 if (last == first) {
55 last = new_node;
56 }
57 first = new_node;
58 new_node->manager = this;
59}
60
61void ASTZipper::InsertAfter(const ASTNode new_node, const ASTNode at_node) {
62 ASSERT(new_node->manager == nullptr);
63 if (!at_node) {
64 PushFront(new_node);
65 return;
66 }
67 const ASTNode next = at_node->next;
68 if (next) {
69 next->previous = new_node;
70 }
71 new_node->previous = at_node;
72 if (at_node == last) {
73 last = new_node;
74 }
75 new_node->next = next;
76 at_node->next = new_node;
77 new_node->manager = this;
78}
79
80void ASTZipper::InsertBefore(const ASTNode new_node, const ASTNode at_node) {
81 ASSERT(new_node->manager == nullptr);
82 if (!at_node) {
83 PushBack(new_node);
84 return;
85 }
86 const ASTNode previous = at_node->previous;
87 if (previous) {
88 previous->next = new_node;
89 }
90 new_node->next = at_node;
91 if (at_node == first) {
92 first = new_node;
93 }
94 new_node->previous = previous;
95 at_node->previous = new_node;
96 new_node->manager = this;
97}
98
99void ASTZipper::DetachTail(ASTNode node) {
100 ASSERT(node->manager == this);
101 if (node == first) {
102 first.reset();
103 last.reset();
104 return;
105 }
106
107 last = node->previous;
108 last->next.reset();
109 node->previous.reset();
110
111 ASTNode current = std::move(node);
112 while (current) {
113 current->manager = nullptr;
114 current->parent.reset();
115 current = current->next;
116 }
117}
118
119void ASTZipper::DetachSegment(const ASTNode start, const ASTNode end) {
120 ASSERT(start->manager == this && end->manager == this);
121 if (start == end) {
122 DetachSingle(start);
123 return;
124 }
125 const ASTNode prev = start->previous;
126 const ASTNode post = end->next;
127 if (!prev) {
128 first = post;
129 } else {
130 prev->next = post;
131 }
132 if (!post) {
133 last = prev;
134 } else {
135 post->previous = prev;
136 }
137 start->previous.reset();
138 end->next.reset();
139 ASTNode current = start;
140 bool found = false;
141 while (current) {
142 current->manager = nullptr;
143 current->parent.reset();
144 found |= current == end;
145 current = current->next;
146 }
147 ASSERT(found);
148}
149
150void ASTZipper::DetachSingle(const ASTNode node) {
151 ASSERT(node->manager == this);
152 const ASTNode prev = node->previous;
153 const ASTNode post = node->next;
154 node->previous.reset();
155 node->next.reset();
156 if (!prev) {
157 first = post;
158 } else {
159 prev->next = post;
160 }
161 if (!post) {
162 last = prev;
163 } else {
164 post->previous = prev;
165 }
166
167 node->manager = nullptr;
168 node->parent.reset();
169}
170
171void ASTZipper::Remove(const ASTNode node) {
172 ASSERT(node->manager == this);
173 const ASTNode next = node->next;
174 const ASTNode previous = node->previous;
175 if (previous) {
176 previous->next = next;
177 }
178 if (next) {
179 next->previous = previous;
180 }
181 node->parent.reset();
182 node->manager = nullptr;
183 if (node == last) {
184 last = previous;
185 }
186 if (node == first) {
187 first = next;
188 }
189}
190
191class ExprPrinter final {
192public:
193 void operator()(const ExprAnd& expr) {
194 inner += "( ";
195 std::visit(*this, *expr.operand1);
196 inner += " && ";
197 std::visit(*this, *expr.operand2);
198 inner += ')';
199 }
200
201 void operator()(const ExprOr& expr) {
202 inner += "( ";
203 std::visit(*this, *expr.operand1);
204 inner += " || ";
205 std::visit(*this, *expr.operand2);
206 inner += ')';
207 }
208
209 void operator()(const ExprNot& expr) {
210 inner += "!";
211 std::visit(*this, *expr.operand1);
212 }
213
214 void operator()(const ExprPredicate& expr) {
215 inner += fmt::format("P{}", expr.predicate);
216 }
217
218 void operator()(const ExprCondCode& expr) {
219 inner += fmt::format("CC{}", expr.cc);
220 }
221
222 void operator()(const ExprVar& expr) {
223 inner += fmt::format("V{}", expr.var_index);
224 }
225
226 void operator()(const ExprBoolean& expr) {
227 inner += expr.value ? "true" : "false";
228 }
229
230 void operator()(const ExprGprEqual& expr) {
231 inner += fmt::format("(gpr_{} == {})", expr.gpr, expr.value);
232 }
233
234 const std::string& GetResult() const {
235 return inner;
236 }
237
238private:
239 std::string inner;
240};
241
242class ASTPrinter {
243public:
244 void operator()(const ASTProgram& ast) {
245 scope++;
246 inner += "program {\n";
247 ASTNode current = ast.nodes.GetFirst();
248 while (current) {
249 Visit(current);
250 current = current->GetNext();
251 }
252 inner += "}\n";
253 scope--;
254 }
255
256 void operator()(const ASTIfThen& ast) {
257 ExprPrinter expr_parser{};
258 std::visit(expr_parser, *ast.condition);
259 inner += fmt::format("{}if ({}) {{\n", Indent(), expr_parser.GetResult());
260 scope++;
261 ASTNode current = ast.nodes.GetFirst();
262 while (current) {
263 Visit(current);
264 current = current->GetNext();
265 }
266 scope--;
267 inner += fmt::format("{}}}\n", Indent());
268 }
269
270 void operator()(const ASTIfElse& ast) {
271 inner += Indent();
272 inner += "else {\n";
273
274 scope++;
275 ASTNode current = ast.nodes.GetFirst();
276 while (current) {
277 Visit(current);
278 current = current->GetNext();
279 }
280 scope--;
281
282 inner += Indent();
283 inner += "}\n";
284 }
285
286 void operator()(const ASTBlockEncoded& ast) {
287 inner += fmt::format("{}Block({}, {});\n", Indent(), ast.start, ast.end);
288 }
289
290 void operator()([[maybe_unused]] const ASTBlockDecoded& ast) {
291 inner += Indent();
292 inner += "Block;\n";
293 }
294
295 void operator()(const ASTVarSet& ast) {
296 ExprPrinter expr_parser{};
297 std::visit(expr_parser, *ast.condition);
298 inner += fmt::format("{}V{} := {};\n", Indent(), ast.index, expr_parser.GetResult());
299 }
300
301 void operator()(const ASTLabel& ast) {
302 inner += fmt::format("Label_{}:\n", ast.index);
303 }
304
305 void operator()(const ASTGoto& ast) {
306 ExprPrinter expr_parser{};
307 std::visit(expr_parser, *ast.condition);
308 inner +=
309 fmt::format("{}({}) -> goto Label_{};\n", Indent(), expr_parser.GetResult(), ast.label);
310 }
311
312 void operator()(const ASTDoWhile& ast) {
313 ExprPrinter expr_parser{};
314 std::visit(expr_parser, *ast.condition);
315 inner += fmt::format("{}do {{\n", Indent());
316 scope++;
317 ASTNode current = ast.nodes.GetFirst();
318 while (current) {
319 Visit(current);
320 current = current->GetNext();
321 }
322 scope--;
323 inner += fmt::format("{}}} while ({});\n", Indent(), expr_parser.GetResult());
324 }
325
326 void operator()(const ASTReturn& ast) {
327 ExprPrinter expr_parser{};
328 std::visit(expr_parser, *ast.condition);
329 inner += fmt::format("{}({}) -> {};\n", Indent(), expr_parser.GetResult(),
330 ast.kills ? "discard" : "exit");
331 }
332
333 void operator()(const ASTBreak& ast) {
334 ExprPrinter expr_parser{};
335 std::visit(expr_parser, *ast.condition);
336 inner += fmt::format("{}({}) -> break;\n", Indent(), expr_parser.GetResult());
337 }
338
339 void Visit(const ASTNode& node) {
340 std::visit(*this, *node->GetInnerData());
341 }
342
343 const std::string& GetResult() const {
344 return inner;
345 }
346
347private:
348 std::string_view Indent() {
349 if (space_segment_scope == scope) {
350 return space_segment;
351 }
352
353 // Ensure that we don't exceed our view.
354 ASSERT(scope * 2 < spaces.size());
355
356 space_segment = spaces.substr(0, scope * 2);
357 space_segment_scope = scope;
358 return space_segment;
359 }
360
361 std::string inner{};
362 std::string_view space_segment;
363
364 u32 scope{};
365 u32 space_segment_scope{};
366
367 static constexpr std::string_view spaces{" "};
368};
369
370std::string ASTManager::Print() const {
371 ASTPrinter printer{};
372 printer.Visit(main_node);
373 return printer.GetResult();
374}
375
376ASTManager::ASTManager(bool do_full_decompile, bool disable_else_derivation_)
377 : full_decompile{do_full_decompile}, disable_else_derivation{disable_else_derivation_} {}
378
379ASTManager::~ASTManager() {
380 Clear();
381}
382
383void ASTManager::Init() {
384 main_node = ASTBase::Make<ASTProgram>(ASTNode{});
385 program = std::get_if<ASTProgram>(main_node->GetInnerData());
386 false_condition = MakeExpr<ExprBoolean>(false);
387}
388
389void ASTManager::DeclareLabel(u32 address) {
390 const auto pair = labels_map.emplace(address, labels_count);
391 if (pair.second) {
392 labels_count++;
393 labels.resize(labels_count);
394 }
395}
396
397void ASTManager::InsertLabel(u32 address) {
398 const u32 index = labels_map[address];
399 const ASTNode label = ASTBase::Make<ASTLabel>(main_node, index);
400 labels[index] = label;
401 program->nodes.PushBack(label);
402}
403
404void ASTManager::InsertGoto(Expr condition, u32 address) {
405 const u32 index = labels_map[address];
406 const ASTNode goto_node = ASTBase::Make<ASTGoto>(main_node, std::move(condition), index);
407 gotos.push_back(goto_node);
408 program->nodes.PushBack(goto_node);
409}
410
411void ASTManager::InsertBlock(u32 start_address, u32 end_address) {
412 ASTNode block = ASTBase::Make<ASTBlockEncoded>(main_node, start_address, end_address);
413 program->nodes.PushBack(std::move(block));
414}
415
416void ASTManager::InsertReturn(Expr condition, bool kills) {
417 ASTNode node = ASTBase::Make<ASTReturn>(main_node, std::move(condition), kills);
418 program->nodes.PushBack(std::move(node));
419}
420
421// The decompile algorithm is based on
422// "Taming control flow: A structured approach to eliminating goto statements"
423// by AM Erosa, LJ Hendren 1994. In general, the idea is to get gotos to be
424// on the same structured level as the label which they jump to. This is done,
425// through outward/inward movements and lifting. Once they are at the same
426// level, you can enclose them in an "if" structure or a "do-while" structure.
427void ASTManager::Decompile() {
428 auto it = gotos.begin();
429 while (it != gotos.end()) {
430 const ASTNode goto_node = *it;
431 const auto label_index = goto_node->GetGotoLabel();
432 if (!label_index) {
433 return;
434 }
435 const ASTNode label = labels[*label_index];
436 if (!full_decompile) {
437 // We only decompile backward jumps
438 if (!IsBackwardsJump(goto_node, label)) {
439 it++;
440 continue;
441 }
442 }
443 if (IndirectlyRelated(goto_node, label)) {
444 while (!DirectlyRelated(goto_node, label)) {
445 MoveOutward(goto_node);
446 }
447 }
448 if (DirectlyRelated(goto_node, label)) {
449 u32 goto_level = goto_node->GetLevel();
450 const u32 label_level = label->GetLevel();
451 while (label_level < goto_level) {
452 MoveOutward(goto_node);
453 goto_level--;
454 }
455 // TODO(Blinkhawk): Implement Lifting and Inward Movements
456 }
457 if (label->GetParent() == goto_node->GetParent()) {
458 bool is_loop = false;
459 ASTNode current = goto_node->GetPrevious();
460 while (current) {
461 if (current == label) {
462 is_loop = true;
463 break;
464 }
465 current = current->GetPrevious();
466 }
467
468 if (is_loop) {
469 EncloseDoWhile(goto_node, label);
470 } else {
471 EncloseIfThen(goto_node, label);
472 }
473 it = gotos.erase(it);
474 continue;
475 }
476 it++;
477 }
478 if (full_decompile) {
479 for (const ASTNode& label : labels) {
480 auto& manager = label->GetManager();
481 manager.Remove(label);
482 }
483 labels.clear();
484 } else {
485 auto label_it = labels.begin();
486 while (label_it != labels.end()) {
487 bool can_remove = true;
488 ASTNode label = *label_it;
489 for (const ASTNode& goto_node : gotos) {
490 const auto label_index = goto_node->GetGotoLabel();
491 if (!label_index) {
492 return;
493 }
494 ASTNode& glabel = labels[*label_index];
495 if (glabel == label) {
496 can_remove = false;
497 break;
498 }
499 }
500 if (can_remove) {
501 label->MarkLabelUnused();
502 }
503 }
504 }
505}
506
507bool ASTManager::IsBackwardsJump(ASTNode goto_node, ASTNode label_node) const {
508 u32 goto_level = goto_node->GetLevel();
509 u32 label_level = label_node->GetLevel();
510 while (goto_level > label_level) {
511 goto_level--;
512 goto_node = goto_node->GetParent();
513 }
514 while (label_level > goto_level) {
515 label_level--;
516 label_node = label_node->GetParent();
517 }
518 while (goto_node->GetParent() != label_node->GetParent()) {
519 goto_node = goto_node->GetParent();
520 label_node = label_node->GetParent();
521 }
522 ASTNode current = goto_node->GetPrevious();
523 while (current) {
524 if (current == label_node) {
525 return true;
526 }
527 current = current->GetPrevious();
528 }
529 return false;
530}
531
532bool ASTManager::IndirectlyRelated(const ASTNode& first, const ASTNode& second) const {
533 return !(first->GetParent() == second->GetParent() || DirectlyRelated(first, second));
534}
535
536bool ASTManager::DirectlyRelated(const ASTNode& first, const ASTNode& second) const {
537 if (first->GetParent() == second->GetParent()) {
538 return false;
539 }
540 const u32 first_level = first->GetLevel();
541 const u32 second_level = second->GetLevel();
542 u32 min_level;
543 u32 max_level;
544 ASTNode max;
545 ASTNode min;
546 if (first_level > second_level) {
547 min_level = second_level;
548 min = second;
549 max_level = first_level;
550 max = first;
551 } else {
552 min_level = first_level;
553 min = first;
554 max_level = second_level;
555 max = second;
556 }
557
558 while (max_level > min_level) {
559 max_level--;
560 max = max->GetParent();
561 }
562
563 return min->GetParent() == max->GetParent();
564}
565
566void ASTManager::ShowCurrentState(std::string_view state) const {
567 LOG_CRITICAL(HW_GPU, "\nState {}:\n\n{}\n", state, Print());
568 SanityCheck();
569}
570
571void ASTManager::SanityCheck() const {
572 for (const auto& label : labels) {
573 if (!label->GetParent()) {
574 LOG_CRITICAL(HW_GPU, "Sanity Check Failed");
575 }
576 }
577}
578
579void ASTManager::EncloseDoWhile(ASTNode goto_node, ASTNode label) {
580 ASTZipper& zipper = goto_node->GetManager();
581 const ASTNode loop_start = label->GetNext();
582 if (loop_start == goto_node) {
583 zipper.Remove(goto_node);
584 return;
585 }
586 const ASTNode parent = label->GetParent();
587 const Expr condition = goto_node->GetGotoCondition();
588 zipper.DetachSegment(loop_start, goto_node);
589 const ASTNode do_while_node = ASTBase::Make<ASTDoWhile>(parent, condition);
590 ASTZipper* sub_zipper = do_while_node->GetSubNodes();
591 sub_zipper->Init(loop_start, do_while_node);
592 zipper.InsertAfter(do_while_node, label);
593 sub_zipper->Remove(goto_node);
594}
595
596void ASTManager::EncloseIfThen(ASTNode goto_node, ASTNode label) {
597 ASTZipper& zipper = goto_node->GetManager();
598 const ASTNode if_end = label->GetPrevious();
599 if (if_end == goto_node) {
600 zipper.Remove(goto_node);
601 return;
602 }
603 const ASTNode prev = goto_node->GetPrevious();
604 const Expr condition = goto_node->GetGotoCondition();
605 bool do_else = false;
606 if (!disable_else_derivation && prev->IsIfThen()) {
607 const Expr if_condition = prev->GetIfCondition();
608 do_else = ExprAreEqual(if_condition, condition);
609 }
610 const ASTNode parent = label->GetParent();
611 zipper.DetachSegment(goto_node, if_end);
612 ASTNode if_node;
613 if (do_else) {
614 if_node = ASTBase::Make<ASTIfElse>(parent);
615 } else {
616 Expr neg_condition = MakeExprNot(condition);
617 if_node = ASTBase::Make<ASTIfThen>(parent, neg_condition);
618 }
619 ASTZipper* sub_zipper = if_node->GetSubNodes();
620 sub_zipper->Init(goto_node, if_node);
621 zipper.InsertAfter(if_node, prev);
622 sub_zipper->Remove(goto_node);
623}
624
625void ASTManager::MoveOutward(ASTNode goto_node) {
626 ASTZipper& zipper = goto_node->GetManager();
627 const ASTNode parent = goto_node->GetParent();
628 ASTZipper& zipper2 = parent->GetManager();
629 const ASTNode grandpa = parent->GetParent();
630 const bool is_loop = parent->IsLoop();
631 const bool is_else = parent->IsIfElse();
632 const bool is_if = parent->IsIfThen();
633
634 const ASTNode prev = goto_node->GetPrevious();
635 const ASTNode post = goto_node->GetNext();
636
637 const Expr condition = goto_node->GetGotoCondition();
638 zipper.DetachSingle(goto_node);
639 if (is_loop) {
640 const u32 var_index = NewVariable();
641 const Expr var_condition = MakeExpr<ExprVar>(var_index);
642 const ASTNode var_node = ASTBase::Make<ASTVarSet>(parent, var_index, condition);
643 const ASTNode var_node_init = ASTBase::Make<ASTVarSet>(parent, var_index, false_condition);
644 zipper2.InsertBefore(var_node_init, parent);
645 zipper.InsertAfter(var_node, prev);
646 goto_node->SetGotoCondition(var_condition);
647 const ASTNode break_node = ASTBase::Make<ASTBreak>(parent, var_condition);
648 zipper.InsertAfter(break_node, var_node);
649 } else if (is_if || is_else) {
650 const u32 var_index = NewVariable();
651 const Expr var_condition = MakeExpr<ExprVar>(var_index);
652 const ASTNode var_node = ASTBase::Make<ASTVarSet>(parent, var_index, condition);
653 const ASTNode var_node_init = ASTBase::Make<ASTVarSet>(parent, var_index, false_condition);
654 if (is_if) {
655 zipper2.InsertBefore(var_node_init, parent);
656 } else {
657 zipper2.InsertBefore(var_node_init, parent->GetPrevious());
658 }
659 zipper.InsertAfter(var_node, prev);
660 goto_node->SetGotoCondition(var_condition);
661 if (post) {
662 zipper.DetachTail(post);
663 const ASTNode if_node = ASTBase::Make<ASTIfThen>(parent, MakeExprNot(var_condition));
664 ASTZipper* sub_zipper = if_node->GetSubNodes();
665 sub_zipper->Init(post, if_node);
666 zipper.InsertAfter(if_node, var_node);
667 }
668 } else {
669 UNREACHABLE();
670 }
671 const ASTNode next = parent->GetNext();
672 if (is_if && next && next->IsIfElse()) {
673 zipper2.InsertAfter(goto_node, next);
674 goto_node->SetParent(grandpa);
675 return;
676 }
677 zipper2.InsertAfter(goto_node, parent);
678 goto_node->SetParent(grandpa);
679}
680
681class ASTClearer {
682public:
683 ASTClearer() = default;
684
685 void operator()(const ASTProgram& ast) {
686 ASTNode current = ast.nodes.GetFirst();
687 while (current) {
688 Visit(current);
689 current = current->GetNext();
690 }
691 }
692
693 void operator()(const ASTIfThen& ast) {
694 ASTNode current = ast.nodes.GetFirst();
695 while (current) {
696 Visit(current);
697 current = current->GetNext();
698 }
699 }
700
701 void operator()(const ASTIfElse& ast) {
702 ASTNode current = ast.nodes.GetFirst();
703 while (current) {
704 Visit(current);
705 current = current->GetNext();
706 }
707 }
708
709 void operator()([[maybe_unused]] const ASTBlockEncoded& ast) {}
710
711 void operator()(ASTBlockDecoded& ast) {
712 ast.nodes.clear();
713 }
714
715 void operator()([[maybe_unused]] const ASTVarSet& ast) {}
716
717 void operator()([[maybe_unused]] const ASTLabel& ast) {}
718
719 void operator()([[maybe_unused]] const ASTGoto& ast) {}
720
721 void operator()(const ASTDoWhile& ast) {
722 ASTNode current = ast.nodes.GetFirst();
723 while (current) {
724 Visit(current);
725 current = current->GetNext();
726 }
727 }
728
729 void operator()([[maybe_unused]] const ASTReturn& ast) {}
730
731 void operator()([[maybe_unused]] const ASTBreak& ast) {}
732
733 void Visit(const ASTNode& node) {
734 std::visit(*this, *node->GetInnerData());
735 node->Clear();
736 }
737};
738
739void ASTManager::Clear() {
740 if (!main_node) {
741 return;
742 }
743 ASTClearer clearer{};
744 clearer.Visit(main_node);
745 main_node.reset();
746 program = nullptr;
747 labels_map.clear();
748 labels.clear();
749 gotos.clear();
750}
751
752} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/ast.h b/src/video_core/shader/ast.h
deleted file mode 100644
index dc49b369e..000000000
--- a/src/video_core/shader/ast.h
+++ /dev/null
@@ -1,398 +0,0 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <functional>
8#include <list>
9#include <memory>
10#include <optional>
11#include <string>
12#include <unordered_map>
13#include <vector>
14
15#include "video_core/shader/expr.h"
16#include "video_core/shader/node.h"
17
18namespace VideoCommon::Shader {
19
20class ASTBase;
21class ASTBlockDecoded;
22class ASTBlockEncoded;
23class ASTBreak;
24class ASTDoWhile;
25class ASTGoto;
26class ASTIfElse;
27class ASTIfThen;
28class ASTLabel;
29class ASTProgram;
30class ASTReturn;
31class ASTVarSet;
32
33using ASTData = std::variant<ASTProgram, ASTIfThen, ASTIfElse, ASTBlockEncoded, ASTBlockDecoded,
34 ASTVarSet, ASTGoto, ASTLabel, ASTDoWhile, ASTReturn, ASTBreak>;
35
36using ASTNode = std::shared_ptr<ASTBase>;
37
38enum class ASTZipperType : u32 {
39 Program,
40 IfThen,
41 IfElse,
42 Loop,
43};
44
45class ASTZipper final {
46public:
47 explicit ASTZipper();
48
49 void Init(ASTNode first, ASTNode parent);
50
51 ASTNode GetFirst() const {
52 return first;
53 }
54
55 ASTNode GetLast() const {
56 return last;
57 }
58
59 void PushBack(ASTNode new_node);
60 void PushFront(ASTNode new_node);
61 void InsertAfter(ASTNode new_node, ASTNode at_node);
62 void InsertBefore(ASTNode new_node, ASTNode at_node);
63 void DetachTail(ASTNode node);
64 void DetachSingle(ASTNode node);
65 void DetachSegment(ASTNode start, ASTNode end);
66 void Remove(ASTNode node);
67
68 ASTNode first;
69 ASTNode last;
70};
71
72class ASTProgram {
73public:
74 ASTZipper nodes{};
75};
76
77class ASTIfThen {
78public:
79 explicit ASTIfThen(Expr condition_) : condition{std::move(condition_)} {}
80 Expr condition;
81 ASTZipper nodes{};
82};
83
84class ASTIfElse {
85public:
86 ASTZipper nodes{};
87};
88
89class ASTBlockEncoded {
90public:
91 explicit ASTBlockEncoded(u32 start_, u32 _) : start{start_}, end{_} {}
92 u32 start;
93 u32 end;
94};
95
96class ASTBlockDecoded {
97public:
98 explicit ASTBlockDecoded(NodeBlock&& new_nodes_) : nodes(std::move(new_nodes_)) {}
99 NodeBlock nodes;
100};
101
102class ASTVarSet {
103public:
104 explicit ASTVarSet(u32 index_, Expr condition_)
105 : index{index_}, condition{std::move(condition_)} {}
106
107 u32 index;
108 Expr condition;
109};
110
111class ASTLabel {
112public:
113 explicit ASTLabel(u32 index_) : index{index_} {}
114 u32 index;
115 bool unused{};
116};
117
118class ASTGoto {
119public:
120 explicit ASTGoto(Expr condition_, u32 label_)
121 : condition{std::move(condition_)}, label{label_} {}
122
123 Expr condition;
124 u32 label;
125};
126
127class ASTDoWhile {
128public:
129 explicit ASTDoWhile(Expr condition_) : condition{std::move(condition_)} {}
130 Expr condition;
131 ASTZipper nodes{};
132};
133
134class ASTReturn {
135public:
136 explicit ASTReturn(Expr condition_, bool kills_)
137 : condition{std::move(condition_)}, kills{kills_} {}
138
139 Expr condition;
140 bool kills;
141};
142
143class ASTBreak {
144public:
145 explicit ASTBreak(Expr condition_) : condition{std::move(condition_)} {}
146 Expr condition;
147};
148
149class ASTBase {
150public:
151 explicit ASTBase(ASTNode parent_, ASTData data_)
152 : data{std::move(data_)}, parent{std::move(parent_)} {}
153
154 template <class U, class... Args>
155 static ASTNode Make(ASTNode parent, Args&&... args) {
156 return std::make_shared<ASTBase>(std::move(parent),
157 ASTData(U(std::forward<Args>(args)...)));
158 }
159
160 void SetParent(ASTNode new_parent) {
161 parent = std::move(new_parent);
162 }
163
164 ASTNode& GetParent() {
165 return parent;
166 }
167
168 const ASTNode& GetParent() const {
169 return parent;
170 }
171
172 u32 GetLevel() const {
173 u32 level = 0;
174 auto next_parent = parent;
175 while (next_parent) {
176 next_parent = next_parent->GetParent();
177 level++;
178 }
179 return level;
180 }
181
182 ASTData* GetInnerData() {
183 return &data;
184 }
185
186 const ASTData* GetInnerData() const {
187 return &data;
188 }
189
190 ASTNode GetNext() const {
191 return next;
192 }
193
194 ASTNode GetPrevious() const {
195 return previous;
196 }
197
198 ASTZipper& GetManager() {
199 return *manager;
200 }
201
202 const ASTZipper& GetManager() const {
203 return *manager;
204 }
205
206 std::optional<u32> GetGotoLabel() const {
207 if (const auto* inner = std::get_if<ASTGoto>(&data)) {
208 return {inner->label};
209 }
210 return std::nullopt;
211 }
212
213 Expr GetGotoCondition() const {
214 if (const auto* inner = std::get_if<ASTGoto>(&data)) {
215 return inner->condition;
216 }
217 return nullptr;
218 }
219
220 void MarkLabelUnused() {
221 if (auto* inner = std::get_if<ASTLabel>(&data)) {
222 inner->unused = true;
223 }
224 }
225
226 bool IsLabelUnused() const {
227 if (const auto* inner = std::get_if<ASTLabel>(&data)) {
228 return inner->unused;
229 }
230 return true;
231 }
232
233 std::optional<u32> GetLabelIndex() const {
234 if (const auto* inner = std::get_if<ASTLabel>(&data)) {
235 return {inner->index};
236 }
237 return std::nullopt;
238 }
239
240 Expr GetIfCondition() const {
241 if (const auto* inner = std::get_if<ASTIfThen>(&data)) {
242 return inner->condition;
243 }
244 return nullptr;
245 }
246
247 void SetGotoCondition(Expr new_condition) {
248 if (auto* inner = std::get_if<ASTGoto>(&data)) {
249 inner->condition = std::move(new_condition);
250 }
251 }
252
253 bool IsIfThen() const {
254 return std::holds_alternative<ASTIfThen>(data);
255 }
256
257 bool IsIfElse() const {
258 return std::holds_alternative<ASTIfElse>(data);
259 }
260
261 bool IsBlockEncoded() const {
262 return std::holds_alternative<ASTBlockEncoded>(data);
263 }
264
265 void TransformBlockEncoded(NodeBlock&& nodes) {
266 data = ASTBlockDecoded(std::move(nodes));
267 }
268
269 bool IsLoop() const {
270 return std::holds_alternative<ASTDoWhile>(data);
271 }
272
273 ASTZipper* GetSubNodes() {
274 if (std::holds_alternative<ASTProgram>(data)) {
275 return &std::get_if<ASTProgram>(&data)->nodes;
276 }
277 if (std::holds_alternative<ASTIfThen>(data)) {
278 return &std::get_if<ASTIfThen>(&data)->nodes;
279 }
280 if (std::holds_alternative<ASTIfElse>(data)) {
281 return &std::get_if<ASTIfElse>(&data)->nodes;
282 }
283 if (std::holds_alternative<ASTDoWhile>(data)) {
284 return &std::get_if<ASTDoWhile>(&data)->nodes;
285 }
286 return nullptr;
287 }
288
289 void Clear() {
290 next.reset();
291 previous.reset();
292 parent.reset();
293 manager = nullptr;
294 }
295
296private:
297 friend class ASTZipper;
298
299 ASTData data;
300 ASTNode parent;
301 ASTNode next;
302 ASTNode previous;
303 ASTZipper* manager{};
304};
305
306class ASTManager final {
307public:
308 explicit ASTManager(bool do_full_decompile, bool disable_else_derivation_);
309 ~ASTManager();
310
311 ASTManager(const ASTManager& o) = delete;
312 ASTManager& operator=(const ASTManager& other) = delete;
313
314 ASTManager(ASTManager&& other) noexcept = default;
315 ASTManager& operator=(ASTManager&& other) noexcept = default;
316
317 void Init();
318
319 void DeclareLabel(u32 address);
320
321 void InsertLabel(u32 address);
322
323 void InsertGoto(Expr condition, u32 address);
324
325 void InsertBlock(u32 start_address, u32 end_address);
326
327 void InsertReturn(Expr condition, bool kills);
328
329 std::string Print() const;
330
331 void Decompile();
332
333 void ShowCurrentState(std::string_view state) const;
334
335 void SanityCheck() const;
336
337 void Clear();
338
339 bool IsFullyDecompiled() const {
340 if (full_decompile) {
341 return gotos.empty();
342 }
343
344 for (ASTNode goto_node : gotos) {
345 auto label_index = goto_node->GetGotoLabel();
346 if (!label_index) {
347 return false;
348 }
349 ASTNode glabel = labels[*label_index];
350 if (IsBackwardsJump(goto_node, glabel)) {
351 return false;
352 }
353 }
354 return true;
355 }
356
357 ASTNode GetProgram() const {
358 return main_node;
359 }
360
361 u32 GetVariables() const {
362 return variables;
363 }
364
365 const std::vector<ASTNode>& GetLabels() const {
366 return labels;
367 }
368
369private:
370 bool IsBackwardsJump(ASTNode goto_node, ASTNode label_node) const;
371
372 bool IndirectlyRelated(const ASTNode& first, const ASTNode& second) const;
373
374 bool DirectlyRelated(const ASTNode& first, const ASTNode& second) const;
375
376 void EncloseDoWhile(ASTNode goto_node, ASTNode label);
377
378 void EncloseIfThen(ASTNode goto_node, ASTNode label);
379
380 void MoveOutward(ASTNode goto_node);
381
382 u32 NewVariable() {
383 return variables++;
384 }
385
386 bool full_decompile{};
387 bool disable_else_derivation{};
388 std::unordered_map<u32, u32> labels_map{};
389 u32 labels_count{};
390 std::vector<ASTNode> labels{};
391 std::list<ASTNode> gotos{};
392 u32 variables{};
393 ASTProgram* program{};
394 ASTNode main_node{};
395 Expr false_condition{};
396};
397
398} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/async_shaders.cpp b/src/video_core/shader/async_shaders.cpp
deleted file mode 100644
index 02adcf9c7..000000000
--- a/src/video_core/shader/async_shaders.cpp
+++ /dev/null
@@ -1,234 +0,0 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <condition_variable>
6#include <mutex>
7#include <thread>
8#include <vector>
9#include "video_core/engines/maxwell_3d.h"
10#include "video_core/renderer_base.h"
11#include "video_core/renderer_opengl/gl_shader_cache.h"
12#include "video_core/shader/async_shaders.h"
13
14namespace VideoCommon::Shader {
15
16AsyncShaders::AsyncShaders(Core::Frontend::EmuWindow& emu_window_) : emu_window(emu_window_) {}
17
18AsyncShaders::~AsyncShaders() {
19 KillWorkers();
20}
21
22void AsyncShaders::AllocateWorkers() {
23 // Use at least one thread
24 u32 num_workers = 1;
25
26 // Deduce how many more threads we can use
27 const u32 thread_count = std::thread::hardware_concurrency();
28 if (thread_count >= 8) {
29 // Increase async workers by 1 for every 2 threads >= 8
30 num_workers += 1 + (thread_count - 8) / 2;
31 }
32
33 // If we already have workers queued, ignore
34 if (num_workers == worker_threads.size()) {
35 return;
36 }
37
38 // If workers already exist, clear them
39 if (!worker_threads.empty()) {
40 FreeWorkers();
41 }
42
43 // Create workers
44 for (std::size_t i = 0; i < num_workers; i++) {
45 context_list.push_back(emu_window.CreateSharedContext());
46 worker_threads.emplace_back(&AsyncShaders::ShaderCompilerThread, this,
47 context_list[i].get());
48 }
49}
50
51void AsyncShaders::FreeWorkers() {
52 // Mark all threads to quit
53 is_thread_exiting.store(true);
54 cv.notify_all();
55 for (auto& thread : worker_threads) {
56 thread.join();
57 }
58 // Clear our shared contexts
59 context_list.clear();
60
61 // Clear our worker threads
62 worker_threads.clear();
63}
64
65void AsyncShaders::KillWorkers() {
66 is_thread_exiting.store(true);
67 cv.notify_all();
68 for (auto& thread : worker_threads) {
69 thread.detach();
70 }
71 // Clear our shared contexts
72 context_list.clear();
73
74 // Clear our worker threads
75 worker_threads.clear();
76}
77
78bool AsyncShaders::HasWorkQueued() const {
79 return !pending_queue.empty();
80}
81
82bool AsyncShaders::HasCompletedWork() const {
83 std::shared_lock lock{completed_mutex};
84 return !finished_work.empty();
85}
86
87bool AsyncShaders::IsShaderAsync(const Tegra::GPU& gpu) const {
88 const auto& regs = gpu.Maxwell3D().regs;
89
90 // If something is using depth, we can assume that games are not rendering anything which will
91 // be used one time.
92 if (regs.zeta_enable) {
93 return true;
94 }
95
96 // If games are using a small index count, we can assume these are full screen quads. Usually
97 // these shaders are only used once for building textures so we can assume they can't be built
98 // async
99 if (regs.index_array.count <= 6 || regs.vertex_buffer.count <= 6) {
100 return false;
101 }
102
103 return true;
104}
105
106std::vector<AsyncShaders::Result> AsyncShaders::GetCompletedWork() {
107 std::vector<Result> results;
108 {
109 std::unique_lock lock{completed_mutex};
110 results = std::move(finished_work);
111 finished_work.clear();
112 }
113 return results;
114}
115
116void AsyncShaders::QueueOpenGLShader(const OpenGL::Device& device,
117 Tegra::Engines::ShaderType shader_type, u64 uid,
118 std::vector<u64> code, std::vector<u64> code_b,
119 u32 main_offset, CompilerSettings compiler_settings,
120 const Registry& registry, VAddr cpu_addr) {
121 std::unique_lock lock(queue_mutex);
122 pending_queue.push({
123 .backend = device.UseAssemblyShaders() ? Backend::GLASM : Backend::OpenGL,
124 .device = &device,
125 .shader_type = shader_type,
126 .uid = uid,
127 .code = std::move(code),
128 .code_b = std::move(code_b),
129 .main_offset = main_offset,
130 .compiler_settings = compiler_settings,
131 .registry = registry,
132 .cpu_address = cpu_addr,
133 .pp_cache = nullptr,
134 .vk_device = nullptr,
135 .scheduler = nullptr,
136 .descriptor_pool = nullptr,
137 .update_descriptor_queue = nullptr,
138 .bindings{},
139 .program{},
140 .key{},
141 .num_color_buffers = 0,
142 });
143 cv.notify_one();
144}
145
146void AsyncShaders::QueueVulkanShader(Vulkan::VKPipelineCache* pp_cache,
147 const Vulkan::Device& device, Vulkan::VKScheduler& scheduler,
148 Vulkan::VKDescriptorPool& descriptor_pool,
149 Vulkan::VKUpdateDescriptorQueue& update_descriptor_queue,
150 std::vector<VkDescriptorSetLayoutBinding> bindings,
151 Vulkan::SPIRVProgram program,
152 Vulkan::GraphicsPipelineCacheKey key, u32 num_color_buffers) {
153 std::unique_lock lock(queue_mutex);
154 pending_queue.push({
155 .backend = Backend::Vulkan,
156 .device = nullptr,
157 .shader_type{},
158 .uid = 0,
159 .code{},
160 .code_b{},
161 .main_offset = 0,
162 .compiler_settings{},
163 .registry{},
164 .cpu_address = 0,
165 .pp_cache = pp_cache,
166 .vk_device = &device,
167 .scheduler = &scheduler,
168 .descriptor_pool = &descriptor_pool,
169 .update_descriptor_queue = &update_descriptor_queue,
170 .bindings = std::move(bindings),
171 .program = std::move(program),
172 .key = key,
173 .num_color_buffers = num_color_buffers,
174 });
175 cv.notify_one();
176}
177
178void AsyncShaders::ShaderCompilerThread(Core::Frontend::GraphicsContext* context) {
179 while (!is_thread_exiting.load(std::memory_order_relaxed)) {
180 std::unique_lock lock{queue_mutex};
181 cv.wait(lock, [this] { return HasWorkQueued() || is_thread_exiting; });
182 if (is_thread_exiting) {
183 return;
184 }
185
186 // Partial lock to allow all threads to read at the same time
187 if (!HasWorkQueued()) {
188 continue;
189 }
190 // Another thread beat us, just unlock and wait for the next load
191 if (pending_queue.empty()) {
192 continue;
193 }
194
195 // Pull work from queue
196 WorkerParams work = std::move(pending_queue.front());
197 pending_queue.pop();
198 lock.unlock();
199
200 if (work.backend == Backend::OpenGL || work.backend == Backend::GLASM) {
201 const ShaderIR ir(work.code, work.main_offset, work.compiler_settings, *work.registry);
202 const auto scope = context->Acquire();
203 auto program =
204 OpenGL::BuildShader(*work.device, work.shader_type, work.uid, ir, *work.registry);
205 Result result{};
206 result.backend = work.backend;
207 result.cpu_address = work.cpu_address;
208 result.uid = work.uid;
209 result.code = std::move(work.code);
210 result.code_b = std::move(work.code_b);
211 result.shader_type = work.shader_type;
212
213 if (work.backend == Backend::OpenGL) {
214 result.program.opengl = std::move(program->source_program);
215 } else if (work.backend == Backend::GLASM) {
216 result.program.glasm = std::move(program->assembly_program);
217 }
218
219 {
220 std::unique_lock complete_lock(completed_mutex);
221 finished_work.push_back(std::move(result));
222 }
223 } else if (work.backend == Backend::Vulkan) {
224 auto pipeline = std::make_unique<Vulkan::VKGraphicsPipeline>(
225 *work.vk_device, *work.scheduler, *work.descriptor_pool,
226 *work.update_descriptor_queue, work.key, work.bindings, work.program,
227 work.num_color_buffers);
228
229 work.pp_cache->EmplacePipeline(std::move(pipeline));
230 }
231 }
232}
233
234} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/async_shaders.h b/src/video_core/shader/async_shaders.h
deleted file mode 100644
index 7fdff6e56..000000000
--- a/src/video_core/shader/async_shaders.h
+++ /dev/null
@@ -1,138 +0,0 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <condition_variable>
8#include <memory>
9#include <shared_mutex>
10#include <thread>
11
12#include <glad/glad.h>
13
14#include "common/common_types.h"
15#include "video_core/renderer_opengl/gl_device.h"
16#include "video_core/renderer_opengl/gl_resource_manager.h"
17#include "video_core/renderer_opengl/gl_shader_decompiler.h"
18#include "video_core/renderer_vulkan/vk_pipeline_cache.h"
19#include "video_core/renderer_vulkan/vk_scheduler.h"
20#include "video_core/vulkan_common/vulkan_device.h"
21
22namespace Core::Frontend {
23class EmuWindow;
24class GraphicsContext;
25} // namespace Core::Frontend
26
27namespace Tegra {
28class GPU;
29}
30
31namespace Vulkan {
32class VKPipelineCache;
33}
34
35namespace VideoCommon::Shader {
36
37class AsyncShaders {
38public:
39 enum class Backend {
40 OpenGL,
41 GLASM,
42 Vulkan,
43 };
44
45 struct ResultPrograms {
46 OpenGL::OGLProgram opengl;
47 OpenGL::OGLAssemblyProgram glasm;
48 };
49
50 struct Result {
51 u64 uid;
52 VAddr cpu_address;
53 Backend backend;
54 ResultPrograms program;
55 std::vector<u64> code;
56 std::vector<u64> code_b;
57 Tegra::Engines::ShaderType shader_type;
58 };
59
60 explicit AsyncShaders(Core::Frontend::EmuWindow& emu_window_);
61 ~AsyncShaders();
62
63 /// Start up shader worker threads
64 void AllocateWorkers();
65
66 /// Clear the shader queue and kill all worker threads
67 void FreeWorkers();
68
69 // Force end all threads
70 void KillWorkers();
71
72 /// Check to see if any shaders have actually been compiled
73 [[nodiscard]] bool HasCompletedWork() const;
74
75 /// Deduce if a shader can be build on another thread of MUST be built in sync. We cannot build
76 /// every shader async as some shaders are only built and executed once. We try to "guess" which
77 /// shader would be used only once
78 [[nodiscard]] bool IsShaderAsync(const Tegra::GPU& gpu) const;
79
80 /// Pulls completed compiled shaders
81 [[nodiscard]] std::vector<Result> GetCompletedWork();
82
83 void QueueOpenGLShader(const OpenGL::Device& device, Tegra::Engines::ShaderType shader_type,
84 u64 uid, std::vector<u64> code, std::vector<u64> code_b, u32 main_offset,
85 CompilerSettings compiler_settings, const Registry& registry,
86 VAddr cpu_addr);
87
88 void QueueVulkanShader(Vulkan::VKPipelineCache* pp_cache, const Vulkan::Device& device,
89 Vulkan::VKScheduler& scheduler,
90 Vulkan::VKDescriptorPool& descriptor_pool,
91 Vulkan::VKUpdateDescriptorQueue& update_descriptor_queue,
92 std::vector<VkDescriptorSetLayoutBinding> bindings,
93 Vulkan::SPIRVProgram program, Vulkan::GraphicsPipelineCacheKey key,
94 u32 num_color_buffers);
95
96private:
97 void ShaderCompilerThread(Core::Frontend::GraphicsContext* context);
98
99 /// Check our worker queue to see if we have any work queued already
100 [[nodiscard]] bool HasWorkQueued() const;
101
102 struct WorkerParams {
103 Backend backend;
104 // For OGL
105 const OpenGL::Device* device;
106 Tegra::Engines::ShaderType shader_type;
107 u64 uid;
108 std::vector<u64> code;
109 std::vector<u64> code_b;
110 u32 main_offset;
111 CompilerSettings compiler_settings;
112 std::optional<Registry> registry;
113 VAddr cpu_address;
114
115 // For Vulkan
116 Vulkan::VKPipelineCache* pp_cache;
117 const Vulkan::Device* vk_device;
118 Vulkan::VKScheduler* scheduler;
119 Vulkan::VKDescriptorPool* descriptor_pool;
120 Vulkan::VKUpdateDescriptorQueue* update_descriptor_queue;
121 std::vector<VkDescriptorSetLayoutBinding> bindings;
122 Vulkan::SPIRVProgram program;
123 Vulkan::GraphicsPipelineCacheKey key;
124 u32 num_color_buffers;
125 };
126
127 std::condition_variable cv;
128 mutable std::mutex queue_mutex;
129 mutable std::shared_mutex completed_mutex;
130 std::atomic<bool> is_thread_exiting{};
131 std::vector<std::unique_ptr<Core::Frontend::GraphicsContext>> context_list;
132 std::vector<std::thread> worker_threads;
133 std::queue<WorkerParams> pending_queue;
134 std::vector<Result> finished_work;
135 Core::Frontend::EmuWindow& emu_window;
136};
137
138} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/compiler_settings.cpp b/src/video_core/shader/compiler_settings.cpp
deleted file mode 100644
index cddcbd4f0..000000000
--- a/src/video_core/shader/compiler_settings.cpp
+++ /dev/null
@@ -1,26 +0,0 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "video_core/shader/compiler_settings.h"
6
7namespace VideoCommon::Shader {
8
9std::string CompileDepthAsString(const CompileDepth cd) {
10 switch (cd) {
11 case CompileDepth::BruteForce:
12 return "Brute Force Compile";
13 case CompileDepth::FlowStack:
14 return "Simple Flow Stack Mode";
15 case CompileDepth::NoFlowStack:
16 return "Remove Flow Stack";
17 case CompileDepth::DecompileBackwards:
18 return "Decompile Backward Jumps";
19 case CompileDepth::FullDecompile:
20 return "Full Decompilation";
21 default:
22 return "Unknown Compiler Process";
23 }
24}
25
26} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/compiler_settings.h b/src/video_core/shader/compiler_settings.h
deleted file mode 100644
index 916018c01..000000000
--- a/src/video_core/shader/compiler_settings.h
+++ /dev/null
@@ -1,26 +0,0 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include "video_core/engines/shader_bytecode.h"
8
9namespace VideoCommon::Shader {
10
11enum class CompileDepth : u32 {
12 BruteForce = 0,
13 FlowStack = 1,
14 NoFlowStack = 2,
15 DecompileBackwards = 3,
16 FullDecompile = 4,
17};
18
19std::string CompileDepthAsString(CompileDepth cd);
20
21struct CompilerSettings {
22 CompileDepth depth{CompileDepth::NoFlowStack};
23 bool disable_else_derivation{true};
24};
25
26} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/control_flow.cpp b/src/video_core/shader/control_flow.cpp
deleted file mode 100644
index 43d965f2f..000000000
--- a/src/video_core/shader/control_flow.cpp
+++ /dev/null
@@ -1,751 +0,0 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <list>
6#include <map>
7#include <set>
8#include <stack>
9#include <unordered_map>
10#include <vector>
11
12#include "common/assert.h"
13#include "common/common_types.h"
14#include "video_core/shader/ast.h"
15#include "video_core/shader/control_flow.h"
16#include "video_core/shader/memory_util.h"
17#include "video_core/shader/registry.h"
18#include "video_core/shader/shader_ir.h"
19
20namespace VideoCommon::Shader {
21
22namespace {
23
24using Tegra::Shader::Instruction;
25using Tegra::Shader::OpCode;
26
27constexpr s32 unassigned_branch = -2;
28
29struct Query {
30 u32 address{};
31 std::stack<u32> ssy_stack{};
32 std::stack<u32> pbk_stack{};
33};
34
35struct BlockStack {
36 BlockStack() = default;
37 explicit BlockStack(const Query& q) : ssy_stack{q.ssy_stack}, pbk_stack{q.pbk_stack} {}
38 std::stack<u32> ssy_stack{};
39 std::stack<u32> pbk_stack{};
40};
41
42template <typename T, typename... Args>
43BlockBranchInfo MakeBranchInfo(Args&&... args) {
44 static_assert(std::is_convertible_v<T, BranchData>);
45 return std::make_shared<BranchData>(T(std::forward<Args>(args)...));
46}
47
48bool BlockBranchIsIgnored(BlockBranchInfo first) {
49 bool ignore = false;
50 if (std::holds_alternative<SingleBranch>(*first)) {
51 const auto branch = std::get_if<SingleBranch>(first.get());
52 ignore = branch->ignore;
53 }
54 return ignore;
55}
56
57struct BlockInfo {
58 u32 start{};
59 u32 end{};
60 bool visited{};
61 BlockBranchInfo branch{};
62
63 bool IsInside(const u32 address) const {
64 return start <= address && address <= end;
65 }
66};
67
68struct CFGRebuildState {
69 explicit CFGRebuildState(const ProgramCode& program_code_, u32 start_, Registry& registry_)
70 : program_code{program_code_}, registry{registry_}, start{start_} {}
71
72 const ProgramCode& program_code;
73 Registry& registry;
74 u32 start{};
75 std::vector<BlockInfo> block_info;
76 std::list<u32> inspect_queries;
77 std::list<Query> queries;
78 std::unordered_map<u32, u32> registered;
79 std::set<u32> labels;
80 std::map<u32, u32> ssy_labels;
81 std::map<u32, u32> pbk_labels;
82 std::unordered_map<u32, BlockStack> stacks;
83 ASTManager* manager{};
84};
85
86enum class BlockCollision : u32 { None, Found, Inside };
87
88std::pair<BlockCollision, u32> TryGetBlock(CFGRebuildState& state, u32 address) {
89 const auto& blocks = state.block_info;
90 for (u32 index = 0; index < blocks.size(); index++) {
91 if (blocks[index].start == address) {
92 return {BlockCollision::Found, index};
93 }
94 if (blocks[index].IsInside(address)) {
95 return {BlockCollision::Inside, index};
96 }
97 }
98 return {BlockCollision::None, 0xFFFFFFFF};
99}
100
101struct ParseInfo {
102 BlockBranchInfo branch_info{};
103 u32 end_address{};
104};
105
106BlockInfo& CreateBlockInfo(CFGRebuildState& state, u32 start, u32 end) {
107 auto& it = state.block_info.emplace_back();
108 it.start = start;
109 it.end = end;
110 const u32 index = static_cast<u32>(state.block_info.size() - 1);
111 state.registered.insert({start, index});
112 return it;
113}
114
115Pred GetPredicate(u32 index, bool negated) {
116 return static_cast<Pred>(static_cast<u64>(index) + (negated ? 8ULL : 0ULL));
117}
118
119enum class ParseResult : u32 {
120 ControlCaught,
121 BlockEnd,
122 AbnormalFlow,
123};
124
125struct BranchIndirectInfo {
126 u32 buffer{};
127 u32 offset{};
128 u32 entries{};
129 s32 relative_position{};
130};
131
132struct BufferInfo {
133 u32 index;
134 u32 offset;
135};
136
137std::optional<std::pair<s32, u64>> GetBRXInfo(const CFGRebuildState& state, u32& pos) {
138 const Instruction instr = state.program_code[pos];
139 const auto opcode = OpCode::Decode(instr);
140 if (opcode->get().GetId() != OpCode::Id::BRX) {
141 return std::nullopt;
142 }
143 if (instr.brx.constant_buffer != 0) {
144 return std::nullopt;
145 }
146 --pos;
147 return std::make_pair(instr.brx.GetBranchExtend(), instr.gpr8.Value());
148}
149
150template <typename Result, typename TestCallable, typename PackCallable>
151// requires std::predicate<TestCallable, Instruction, const OpCode::Matcher&>
152// requires std::invocable<PackCallable, Instruction, const OpCode::Matcher&>
153std::optional<Result> TrackInstruction(const CFGRebuildState& state, u32& pos, TestCallable test,
154 PackCallable pack) {
155 for (; pos >= state.start; --pos) {
156 if (IsSchedInstruction(pos, state.start)) {
157 continue;
158 }
159 const Instruction instr = state.program_code[pos];
160 const auto opcode = OpCode::Decode(instr);
161 if (!opcode) {
162 continue;
163 }
164 if (test(instr, opcode->get())) {
165 --pos;
166 return std::make_optional(pack(instr, opcode->get()));
167 }
168 }
169 return std::nullopt;
170}
171
172std::optional<std::pair<BufferInfo, u64>> TrackLDC(const CFGRebuildState& state, u32& pos,
173 u64 brx_tracked_register) {
174 return TrackInstruction<std::pair<BufferInfo, u64>>(
175 state, pos,
176 [brx_tracked_register](auto instr, const auto& opcode) {
177 return opcode.GetId() == OpCode::Id::LD_C &&
178 instr.gpr0.Value() == brx_tracked_register &&
179 instr.ld_c.type.Value() == Tegra::Shader::UniformType::Single;
180 },
181 [](auto instr, const auto& opcode) {
182 const BufferInfo info = {static_cast<u32>(instr.cbuf36.index.Value()),
183 static_cast<u32>(instr.cbuf36.GetOffset())};
184 return std::make_pair(info, instr.gpr8.Value());
185 });
186}
187
188std::optional<u64> TrackSHLRegister(const CFGRebuildState& state, u32& pos,
189 u64 ldc_tracked_register) {
190 return TrackInstruction<u64>(
191 state, pos,
192 [ldc_tracked_register](auto instr, const auto& opcode) {
193 return opcode.GetId() == OpCode::Id::SHL_IMM &&
194 instr.gpr0.Value() == ldc_tracked_register;
195 },
196 [](auto instr, const auto&) { return instr.gpr8.Value(); });
197}
198
199std::optional<u32> TrackIMNMXValue(const CFGRebuildState& state, u32& pos,
200 u64 shl_tracked_register) {
201 return TrackInstruction<u32>(
202 state, pos,
203 [shl_tracked_register](auto instr, const auto& opcode) {
204 return opcode.GetId() == OpCode::Id::IMNMX_IMM &&
205 instr.gpr0.Value() == shl_tracked_register;
206 },
207 [](auto instr, const auto&) {
208 return static_cast<u32>(instr.alu.GetSignedImm20_20() + 1);
209 });
210}
211
212std::optional<BranchIndirectInfo> TrackBranchIndirectInfo(const CFGRebuildState& state, u32 pos) {
213 const auto brx_info = GetBRXInfo(state, pos);
214 if (!brx_info) {
215 return std::nullopt;
216 }
217 const auto [relative_position, brx_tracked_register] = *brx_info;
218
219 const auto ldc_info = TrackLDC(state, pos, brx_tracked_register);
220 if (!ldc_info) {
221 return std::nullopt;
222 }
223 const auto [buffer_info, ldc_tracked_register] = *ldc_info;
224
225 const auto shl_tracked_register = TrackSHLRegister(state, pos, ldc_tracked_register);
226 if (!shl_tracked_register) {
227 return std::nullopt;
228 }
229
230 const auto entries = TrackIMNMXValue(state, pos, *shl_tracked_register);
231 if (!entries) {
232 return std::nullopt;
233 }
234
235 return BranchIndirectInfo{buffer_info.index, buffer_info.offset, *entries, relative_position};
236}
237
238std::pair<ParseResult, ParseInfo> ParseCode(CFGRebuildState& state, u32 address) {
239 u32 offset = static_cast<u32>(address);
240 const u32 end_address = static_cast<u32>(state.program_code.size());
241 ParseInfo parse_info{};
242 SingleBranch single_branch{};
243
244 const auto insert_label = [](CFGRebuildState& rebuild_state, u32 label_address) {
245 const auto pair = rebuild_state.labels.emplace(label_address);
246 if (pair.second) {
247 rebuild_state.inspect_queries.push_back(label_address);
248 }
249 };
250
251 while (true) {
252 if (offset >= end_address) {
253 // ASSERT_OR_EXECUTE can't be used, as it ignores the break
254 ASSERT_MSG(false, "Shader passed the current limit!");
255
256 single_branch.address = exit_branch;
257 single_branch.ignore = false;
258 break;
259 }
260 if (state.registered.contains(offset)) {
261 single_branch.address = offset;
262 single_branch.ignore = true;
263 break;
264 }
265 if (IsSchedInstruction(offset, state.start)) {
266 offset++;
267 continue;
268 }
269 const Instruction instr = {state.program_code[offset]};
270 const auto opcode = OpCode::Decode(instr);
271 if (!opcode || opcode->get().GetType() != OpCode::Type::Flow) {
272 offset++;
273 continue;
274 }
275
276 switch (opcode->get().GetId()) {
277 case OpCode::Id::EXIT: {
278 const auto pred_index = static_cast<u32>(instr.pred.pred_index);
279 single_branch.condition.predicate = GetPredicate(pred_index, instr.negate_pred != 0);
280 if (single_branch.condition.predicate == Pred::NeverExecute) {
281 offset++;
282 continue;
283 }
284 const ConditionCode cc = instr.flow_condition_code;
285 single_branch.condition.cc = cc;
286 if (cc == ConditionCode::F) {
287 offset++;
288 continue;
289 }
290 single_branch.address = exit_branch;
291 single_branch.kill = false;
292 single_branch.is_sync = false;
293 single_branch.is_brk = false;
294 single_branch.ignore = false;
295 parse_info.end_address = offset;
296 parse_info.branch_info = MakeBranchInfo<SingleBranch>(
297 single_branch.condition, single_branch.address, single_branch.kill,
298 single_branch.is_sync, single_branch.is_brk, single_branch.ignore);
299
300 return {ParseResult::ControlCaught, parse_info};
301 }
302 case OpCode::Id::BRA: {
303 if (instr.bra.constant_buffer != 0) {
304 return {ParseResult::AbnormalFlow, parse_info};
305 }
306 const auto pred_index = static_cast<u32>(instr.pred.pred_index);
307 single_branch.condition.predicate = GetPredicate(pred_index, instr.negate_pred != 0);
308 if (single_branch.condition.predicate == Pred::NeverExecute) {
309 offset++;
310 continue;
311 }
312 const ConditionCode cc = instr.flow_condition_code;
313 single_branch.condition.cc = cc;
314 if (cc == ConditionCode::F) {
315 offset++;
316 continue;
317 }
318 const u32 branch_offset = offset + instr.bra.GetBranchTarget();
319 if (branch_offset == 0) {
320 single_branch.address = exit_branch;
321 } else {
322 single_branch.address = branch_offset;
323 }
324 insert_label(state, branch_offset);
325 single_branch.kill = false;
326 single_branch.is_sync = false;
327 single_branch.is_brk = false;
328 single_branch.ignore = false;
329 parse_info.end_address = offset;
330 parse_info.branch_info = MakeBranchInfo<SingleBranch>(
331 single_branch.condition, single_branch.address, single_branch.kill,
332 single_branch.is_sync, single_branch.is_brk, single_branch.ignore);
333
334 return {ParseResult::ControlCaught, parse_info};
335 }
336 case OpCode::Id::SYNC: {
337 const auto pred_index = static_cast<u32>(instr.pred.pred_index);
338 single_branch.condition.predicate = GetPredicate(pred_index, instr.negate_pred != 0);
339 if (single_branch.condition.predicate == Pred::NeverExecute) {
340 offset++;
341 continue;
342 }
343 const ConditionCode cc = instr.flow_condition_code;
344 single_branch.condition.cc = cc;
345 if (cc == ConditionCode::F) {
346 offset++;
347 continue;
348 }
349 single_branch.address = unassigned_branch;
350 single_branch.kill = false;
351 single_branch.is_sync = true;
352 single_branch.is_brk = false;
353 single_branch.ignore = false;
354 parse_info.end_address = offset;
355 parse_info.branch_info = MakeBranchInfo<SingleBranch>(
356 single_branch.condition, single_branch.address, single_branch.kill,
357 single_branch.is_sync, single_branch.is_brk, single_branch.ignore);
358
359 return {ParseResult::ControlCaught, parse_info};
360 }
361 case OpCode::Id::BRK: {
362 const auto pred_index = static_cast<u32>(instr.pred.pred_index);
363 single_branch.condition.predicate = GetPredicate(pred_index, instr.negate_pred != 0);
364 if (single_branch.condition.predicate == Pred::NeverExecute) {
365 offset++;
366 continue;
367 }
368 const ConditionCode cc = instr.flow_condition_code;
369 single_branch.condition.cc = cc;
370 if (cc == ConditionCode::F) {
371 offset++;
372 continue;
373 }
374 single_branch.address = unassigned_branch;
375 single_branch.kill = false;
376 single_branch.is_sync = false;
377 single_branch.is_brk = true;
378 single_branch.ignore = false;
379 parse_info.end_address = offset;
380 parse_info.branch_info = MakeBranchInfo<SingleBranch>(
381 single_branch.condition, single_branch.address, single_branch.kill,
382 single_branch.is_sync, single_branch.is_brk, single_branch.ignore);
383
384 return {ParseResult::ControlCaught, parse_info};
385 }
386 case OpCode::Id::KIL: {
387 const auto pred_index = static_cast<u32>(instr.pred.pred_index);
388 single_branch.condition.predicate = GetPredicate(pred_index, instr.negate_pred != 0);
389 if (single_branch.condition.predicate == Pred::NeverExecute) {
390 offset++;
391 continue;
392 }
393 const ConditionCode cc = instr.flow_condition_code;
394 single_branch.condition.cc = cc;
395 if (cc == ConditionCode::F) {
396 offset++;
397 continue;
398 }
399 single_branch.address = exit_branch;
400 single_branch.kill = true;
401 single_branch.is_sync = false;
402 single_branch.is_brk = false;
403 single_branch.ignore = false;
404 parse_info.end_address = offset;
405 parse_info.branch_info = MakeBranchInfo<SingleBranch>(
406 single_branch.condition, single_branch.address, single_branch.kill,
407 single_branch.is_sync, single_branch.is_brk, single_branch.ignore);
408
409 return {ParseResult::ControlCaught, parse_info};
410 }
411 case OpCode::Id::SSY: {
412 const u32 target = offset + instr.bra.GetBranchTarget();
413 insert_label(state, target);
414 state.ssy_labels.emplace(offset, target);
415 break;
416 }
417 case OpCode::Id::PBK: {
418 const u32 target = offset + instr.bra.GetBranchTarget();
419 insert_label(state, target);
420 state.pbk_labels.emplace(offset, target);
421 break;
422 }
423 case OpCode::Id::BRX: {
424 const auto tmp = TrackBranchIndirectInfo(state, offset);
425 if (!tmp) {
426 LOG_WARNING(HW_GPU, "BRX Track Unsuccesful");
427 return {ParseResult::AbnormalFlow, parse_info};
428 }
429
430 const auto result = *tmp;
431 const s32 pc_target = offset + result.relative_position;
432 std::vector<CaseBranch> branches;
433 for (u32 i = 0; i < result.entries; i++) {
434 auto key = state.registry.ObtainKey(result.buffer, result.offset + i * 4);
435 if (!key) {
436 return {ParseResult::AbnormalFlow, parse_info};
437 }
438 u32 value = *key;
439 u32 target = static_cast<u32>((value >> 3) + pc_target);
440 insert_label(state, target);
441 branches.emplace_back(value, target);
442 }
443 parse_info.end_address = offset;
444 parse_info.branch_info = MakeBranchInfo<MultiBranch>(
445 static_cast<u32>(instr.gpr8.Value()), std::move(branches));
446
447 return {ParseResult::ControlCaught, parse_info};
448 }
449 default:
450 break;
451 }
452
453 offset++;
454 }
455 single_branch.kill = false;
456 single_branch.is_sync = false;
457 single_branch.is_brk = false;
458 parse_info.end_address = offset - 1;
459 parse_info.branch_info = MakeBranchInfo<SingleBranch>(
460 single_branch.condition, single_branch.address, single_branch.kill, single_branch.is_sync,
461 single_branch.is_brk, single_branch.ignore);
462 return {ParseResult::BlockEnd, parse_info};
463}
464
465bool TryInspectAddress(CFGRebuildState& state) {
466 if (state.inspect_queries.empty()) {
467 return false;
468 }
469
470 const u32 address = state.inspect_queries.front();
471 state.inspect_queries.pop_front();
472 const auto [result, block_index] = TryGetBlock(state, address);
473 switch (result) {
474 case BlockCollision::Found: {
475 return true;
476 }
477 case BlockCollision::Inside: {
478 // This case is the tricky one:
479 // We need to split the block into 2 separate blocks
480 const u32 end = state.block_info[block_index].end;
481 BlockInfo& new_block = CreateBlockInfo(state, address, end);
482 BlockInfo& current_block = state.block_info[block_index];
483 current_block.end = address - 1;
484 new_block.branch = std::move(current_block.branch);
485 BlockBranchInfo forward_branch = MakeBranchInfo<SingleBranch>();
486 const auto branch = std::get_if<SingleBranch>(forward_branch.get());
487 branch->address = address;
488 branch->ignore = true;
489 current_block.branch = std::move(forward_branch);
490 return true;
491 }
492 default:
493 break;
494 }
495 const auto [parse_result, parse_info] = ParseCode(state, address);
496 if (parse_result == ParseResult::AbnormalFlow) {
497 // if it's AbnormalFlow, we end it as false, ending the CFG reconstruction
498 return false;
499 }
500
501 BlockInfo& block_info = CreateBlockInfo(state, address, parse_info.end_address);
502 block_info.branch = parse_info.branch_info;
503 if (std::holds_alternative<SingleBranch>(*block_info.branch)) {
504 const auto branch = std::get_if<SingleBranch>(block_info.branch.get());
505 if (branch->condition.IsUnconditional()) {
506 return true;
507 }
508 const u32 fallthrough_address = parse_info.end_address + 1;
509 state.inspect_queries.push_front(fallthrough_address);
510 return true;
511 }
512 return true;
513}
514
515bool TryQuery(CFGRebuildState& state) {
516 const auto gather_labels = [](std::stack<u32>& cc, std::map<u32, u32>& labels,
517 BlockInfo& block) {
518 auto gather_start = labels.lower_bound(block.start);
519 const auto gather_end = labels.upper_bound(block.end);
520 while (gather_start != gather_end) {
521 cc.push(gather_start->second);
522 ++gather_start;
523 }
524 };
525 if (state.queries.empty()) {
526 return false;
527 }
528
529 Query& q = state.queries.front();
530 const u32 block_index = state.registered[q.address];
531 BlockInfo& block = state.block_info[block_index];
532 // If the block is visited, check if the stacks match, else gather the ssy/pbk
533 // labels into the current stack and look if the branch at the end of the block
534 // consumes a label. Schedule new queries accordingly
535 if (block.visited) {
536 BlockStack& stack = state.stacks[q.address];
537 const bool all_okay = (stack.ssy_stack.empty() || q.ssy_stack == stack.ssy_stack) &&
538 (stack.pbk_stack.empty() || q.pbk_stack == stack.pbk_stack);
539 state.queries.pop_front();
540 return all_okay;
541 }
542 block.visited = true;
543 state.stacks.insert_or_assign(q.address, BlockStack{q});
544
545 Query q2(q);
546 state.queries.pop_front();
547 gather_labels(q2.ssy_stack, state.ssy_labels, block);
548 gather_labels(q2.pbk_stack, state.pbk_labels, block);
549 if (std::holds_alternative<SingleBranch>(*block.branch)) {
550 auto* branch = std::get_if<SingleBranch>(block.branch.get());
551 if (!branch->condition.IsUnconditional()) {
552 q2.address = block.end + 1;
553 state.queries.push_back(q2);
554 }
555
556 auto& conditional_query = state.queries.emplace_back(q2);
557 if (branch->is_sync) {
558 if (branch->address == unassigned_branch) {
559 branch->address = conditional_query.ssy_stack.top();
560 }
561 conditional_query.ssy_stack.pop();
562 }
563 if (branch->is_brk) {
564 if (branch->address == unassigned_branch) {
565 branch->address = conditional_query.pbk_stack.top();
566 }
567 conditional_query.pbk_stack.pop();
568 }
569 conditional_query.address = branch->address;
570 return true;
571 }
572
573 const auto* multi_branch = std::get_if<MultiBranch>(block.branch.get());
574 for (const auto& branch_case : multi_branch->branches) {
575 auto& conditional_query = state.queries.emplace_back(q2);
576 conditional_query.address = branch_case.address;
577 }
578
579 return true;
580}
581
582void InsertBranch(ASTManager& mm, const BlockBranchInfo& branch_info) {
583 const auto get_expr = [](const Condition& cond) -> Expr {
584 Expr result;
585 if (cond.cc != ConditionCode::T) {
586 result = MakeExpr<ExprCondCode>(cond.cc);
587 }
588 if (cond.predicate != Pred::UnusedIndex) {
589 u32 pred = static_cast<u32>(cond.predicate);
590 bool negate = false;
591 if (pred > 7) {
592 negate = true;
593 pred -= 8;
594 }
595 Expr extra = MakeExpr<ExprPredicate>(pred);
596 if (negate) {
597 extra = MakeExpr<ExprNot>(std::move(extra));
598 }
599 if (result) {
600 return MakeExpr<ExprAnd>(std::move(extra), std::move(result));
601 }
602 return extra;
603 }
604 if (result) {
605 return result;
606 }
607 return MakeExpr<ExprBoolean>(true);
608 };
609
610 if (std::holds_alternative<SingleBranch>(*branch_info)) {
611 const auto* branch = std::get_if<SingleBranch>(branch_info.get());
612 if (branch->address < 0) {
613 if (branch->kill) {
614 mm.InsertReturn(get_expr(branch->condition), true);
615 return;
616 }
617 mm.InsertReturn(get_expr(branch->condition), false);
618 return;
619 }
620 mm.InsertGoto(get_expr(branch->condition), branch->address);
621 return;
622 }
623 const auto* multi_branch = std::get_if<MultiBranch>(branch_info.get());
624 for (const auto& branch_case : multi_branch->branches) {
625 mm.InsertGoto(MakeExpr<ExprGprEqual>(multi_branch->gpr, branch_case.cmp_value),
626 branch_case.address);
627 }
628}
629
630void DecompileShader(CFGRebuildState& state) {
631 state.manager->Init();
632 for (auto label : state.labels) {
633 state.manager->DeclareLabel(label);
634 }
635 for (const auto& block : state.block_info) {
636 if (state.labels.contains(block.start)) {
637 state.manager->InsertLabel(block.start);
638 }
639 const bool ignore = BlockBranchIsIgnored(block.branch);
640 const u32 end = ignore ? block.end + 1 : block.end;
641 state.manager->InsertBlock(block.start, end);
642 if (!ignore) {
643 InsertBranch(*state.manager, block.branch);
644 }
645 }
646 state.manager->Decompile();
647}
648
649} // Anonymous namespace
650
651std::unique_ptr<ShaderCharacteristics> ScanFlow(const ProgramCode& program_code, u32 start_address,
652 const CompilerSettings& settings,
653 Registry& registry) {
654 auto result_out = std::make_unique<ShaderCharacteristics>();
655 if (settings.depth == CompileDepth::BruteForce) {
656 result_out->settings.depth = CompileDepth::BruteForce;
657 return result_out;
658 }
659
660 CFGRebuildState state{program_code, start_address, registry};
661 // Inspect Code and generate blocks
662 state.labels.clear();
663 state.labels.emplace(start_address);
664 state.inspect_queries.push_back(state.start);
665 while (!state.inspect_queries.empty()) {
666 if (!TryInspectAddress(state)) {
667 result_out->settings.depth = CompileDepth::BruteForce;
668 return result_out;
669 }
670 }
671
672 bool use_flow_stack = true;
673
674 bool decompiled = false;
675
676 if (settings.depth != CompileDepth::FlowStack) {
677 // Decompile Stacks
678 state.queries.push_back(Query{state.start, {}, {}});
679 decompiled = true;
680 while (!state.queries.empty()) {
681 if (!TryQuery(state)) {
682 decompiled = false;
683 break;
684 }
685 }
686 }
687
688 use_flow_stack = !decompiled;
689
690 // Sort and organize results
691 std::sort(state.block_info.begin(), state.block_info.end(),
692 [](const BlockInfo& a, const BlockInfo& b) -> bool { return a.start < b.start; });
693 if (decompiled && settings.depth != CompileDepth::NoFlowStack) {
694 ASTManager manager{settings.depth != CompileDepth::DecompileBackwards,
695 settings.disable_else_derivation};
696 state.manager = &manager;
697 DecompileShader(state);
698 decompiled = state.manager->IsFullyDecompiled();
699 if (!decompiled) {
700 if (settings.depth == CompileDepth::FullDecompile) {
701 LOG_CRITICAL(HW_GPU, "Failed to remove all the gotos!:");
702 } else {
703 LOG_CRITICAL(HW_GPU, "Failed to remove all backward gotos!:");
704 }
705 state.manager->ShowCurrentState("Of Shader");
706 state.manager->Clear();
707 } else {
708 auto characteristics = std::make_unique<ShaderCharacteristics>();
709 characteristics->start = start_address;
710 characteristics->settings.depth = settings.depth;
711 characteristics->manager = std::move(manager);
712 characteristics->end = state.block_info.back().end + 1;
713 return characteristics;
714 }
715 }
716
717 result_out->start = start_address;
718 result_out->settings.depth =
719 use_flow_stack ? CompileDepth::FlowStack : CompileDepth::NoFlowStack;
720 result_out->blocks.clear();
721 for (auto& block : state.block_info) {
722 ShaderBlock new_block{};
723 new_block.start = block.start;
724 new_block.end = block.end;
725 new_block.ignore_branch = BlockBranchIsIgnored(block.branch);
726 if (!new_block.ignore_branch) {
727 new_block.branch = block.branch;
728 }
729 result_out->end = std::max(result_out->end, block.end);
730 result_out->blocks.push_back(new_block);
731 }
732 if (!use_flow_stack) {
733 result_out->labels = std::move(state.labels);
734 return result_out;
735 }
736
737 auto back = result_out->blocks.begin();
738 auto next = std::next(back);
739 while (next != result_out->blocks.end()) {
740 if (!state.labels.contains(next->start) && next->start == back->end + 1) {
741 back->end = next->end;
742 next = result_out->blocks.erase(next);
743 continue;
744 }
745 back = next;
746 ++next;
747 }
748
749 return result_out;
750}
751} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/control_flow.h b/src/video_core/shader/control_flow.h
deleted file mode 100644
index 37bf96492..000000000
--- a/src/video_core/shader/control_flow.h
+++ /dev/null
@@ -1,117 +0,0 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <list>
8#include <optional>
9#include <set>
10#include <variant>
11
12#include "video_core/engines/shader_bytecode.h"
13#include "video_core/shader/ast.h"
14#include "video_core/shader/compiler_settings.h"
15#include "video_core/shader/registry.h"
16#include "video_core/shader/shader_ir.h"
17
18namespace VideoCommon::Shader {
19
20using Tegra::Shader::ConditionCode;
21using Tegra::Shader::Pred;
22
23constexpr s32 exit_branch = -1;
24
25struct Condition {
26 Pred predicate{Pred::UnusedIndex};
27 ConditionCode cc{ConditionCode::T};
28
29 bool IsUnconditional() const {
30 return predicate == Pred::UnusedIndex && cc == ConditionCode::T;
31 }
32
33 bool operator==(const Condition& other) const {
34 return std::tie(predicate, cc) == std::tie(other.predicate, other.cc);
35 }
36
37 bool operator!=(const Condition& other) const {
38 return !operator==(other);
39 }
40};
41
42class SingleBranch {
43public:
44 SingleBranch() = default;
45 explicit SingleBranch(Condition condition_, s32 address_, bool kill_, bool is_sync_,
46 bool is_brk_, bool ignore_)
47 : condition{condition_}, address{address_}, kill{kill_}, is_sync{is_sync_}, is_brk{is_brk_},
48 ignore{ignore_} {}
49
50 bool operator==(const SingleBranch& b) const {
51 return std::tie(condition, address, kill, is_sync, is_brk, ignore) ==
52 std::tie(b.condition, b.address, b.kill, b.is_sync, b.is_brk, b.ignore);
53 }
54
55 bool operator!=(const SingleBranch& b) const {
56 return !operator==(b);
57 }
58
59 Condition condition{};
60 s32 address{exit_branch};
61 bool kill{};
62 bool is_sync{};
63 bool is_brk{};
64 bool ignore{};
65};
66
67struct CaseBranch {
68 explicit CaseBranch(u32 cmp_value_, u32 address_) : cmp_value{cmp_value_}, address{address_} {}
69 u32 cmp_value;
70 u32 address;
71};
72
73class MultiBranch {
74public:
75 explicit MultiBranch(u32 gpr_, std::vector<CaseBranch>&& branches_)
76 : gpr{gpr_}, branches{std::move(branches_)} {}
77
78 u32 gpr{};
79 std::vector<CaseBranch> branches{};
80};
81
82using BranchData = std::variant<SingleBranch, MultiBranch>;
83using BlockBranchInfo = std::shared_ptr<BranchData>;
84
85bool BlockBranchInfoAreEqual(BlockBranchInfo first, BlockBranchInfo second);
86
87struct ShaderBlock {
88 u32 start{};
89 u32 end{};
90 bool ignore_branch{};
91 BlockBranchInfo branch{};
92
93 bool operator==(const ShaderBlock& sb) const {
94 return std::tie(start, end, ignore_branch) ==
95 std::tie(sb.start, sb.end, sb.ignore_branch) &&
96 BlockBranchInfoAreEqual(branch, sb.branch);
97 }
98
99 bool operator!=(const ShaderBlock& sb) const {
100 return !operator==(sb);
101 }
102};
103
104struct ShaderCharacteristics {
105 std::list<ShaderBlock> blocks{};
106 std::set<u32> labels{};
107 u32 start{};
108 u32 end{};
109 ASTManager manager{true, true};
110 CompilerSettings settings{};
111};
112
113std::unique_ptr<ShaderCharacteristics> ScanFlow(const ProgramCode& program_code, u32 start_address,
114 const CompilerSettings& settings,
115 Registry& registry);
116
117} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode.cpp b/src/video_core/shader/decode.cpp
deleted file mode 100644
index 6576d1208..000000000
--- a/src/video_core/shader/decode.cpp
+++ /dev/null
@@ -1,368 +0,0 @@
1// Copyright 2018 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <cstring>
6#include <limits>
7#include <set>
8
9#include <fmt/format.h>
10
11#include "common/assert.h"
12#include "common/common_types.h"
13#include "video_core/engines/shader_bytecode.h"
14#include "video_core/engines/shader_header.h"
15#include "video_core/shader/control_flow.h"
16#include "video_core/shader/memory_util.h"
17#include "video_core/shader/node_helper.h"
18#include "video_core/shader/shader_ir.h"
19
20namespace VideoCommon::Shader {
21
22using Tegra::Shader::Instruction;
23using Tegra::Shader::OpCode;
24
25namespace {
26
27void DeduceTextureHandlerSize(VideoCore::GuestDriverProfile& gpu_driver,
28 const std::list<SamplerEntry>& used_samplers) {
29 if (gpu_driver.IsTextureHandlerSizeKnown() || used_samplers.size() <= 1) {
30 return;
31 }
32 u32 count{};
33 std::vector<u32> bound_offsets;
34 for (const auto& sampler : used_samplers) {
35 if (sampler.is_bindless) {
36 continue;
37 }
38 ++count;
39 bound_offsets.emplace_back(sampler.offset);
40 }
41 if (count > 1) {
42 gpu_driver.DeduceTextureHandlerSize(std::move(bound_offsets));
43 }
44}
45
46std::optional<u32> TryDeduceSamplerSize(const SamplerEntry& sampler_to_deduce,
47 VideoCore::GuestDriverProfile& gpu_driver,
48 const std::list<SamplerEntry>& used_samplers) {
49 const u32 base_offset = sampler_to_deduce.offset;
50 u32 max_offset{std::numeric_limits<u32>::max()};
51 for (const auto& sampler : used_samplers) {
52 if (sampler.is_bindless) {
53 continue;
54 }
55 if (sampler.offset > base_offset) {
56 max_offset = std::min(sampler.offset, max_offset);
57 }
58 }
59 if (max_offset == std::numeric_limits<u32>::max()) {
60 return std::nullopt;
61 }
62 return ((max_offset - base_offset) * 4) / gpu_driver.GetTextureHandlerSize();
63}
64
65} // Anonymous namespace
66
67class ASTDecoder {
68public:
69 explicit ASTDecoder(ShaderIR& ir_) : ir(ir_) {}
70
71 void operator()(ASTProgram& ast) {
72 ASTNode current = ast.nodes.GetFirst();
73 while (current) {
74 Visit(current);
75 current = current->GetNext();
76 }
77 }
78
79 void operator()(ASTIfThen& ast) {
80 ASTNode current = ast.nodes.GetFirst();
81 while (current) {
82 Visit(current);
83 current = current->GetNext();
84 }
85 }
86
87 void operator()(ASTIfElse& ast) {
88 ASTNode current = ast.nodes.GetFirst();
89 while (current) {
90 Visit(current);
91 current = current->GetNext();
92 }
93 }
94
95 void operator()(ASTBlockEncoded& ast) {}
96
97 void operator()(ASTBlockDecoded& ast) {}
98
99 void operator()(ASTVarSet& ast) {}
100
101 void operator()(ASTLabel& ast) {}
102
103 void operator()(ASTGoto& ast) {}
104
105 void operator()(ASTDoWhile& ast) {
106 ASTNode current = ast.nodes.GetFirst();
107 while (current) {
108 Visit(current);
109 current = current->GetNext();
110 }
111 }
112
113 void operator()(ASTReturn& ast) {}
114
115 void operator()(ASTBreak& ast) {}
116
117 void Visit(ASTNode& node) {
118 std::visit(*this, *node->GetInnerData());
119 if (node->IsBlockEncoded()) {
120 auto block = std::get_if<ASTBlockEncoded>(node->GetInnerData());
121 NodeBlock bb = ir.DecodeRange(block->start, block->end);
122 node->TransformBlockEncoded(std::move(bb));
123 }
124 }
125
126private:
127 ShaderIR& ir;
128};
129
130void ShaderIR::Decode() {
131 std::memcpy(&header, program_code.data(), sizeof(Tegra::Shader::Header));
132
133 decompiled = false;
134 auto info = ScanFlow(program_code, main_offset, settings, registry);
135 auto& shader_info = *info;
136 coverage_begin = shader_info.start;
137 coverage_end = shader_info.end;
138 switch (shader_info.settings.depth) {
139 case CompileDepth::FlowStack: {
140 for (const auto& block : shader_info.blocks) {
141 basic_blocks.insert({block.start, DecodeRange(block.start, block.end + 1)});
142 }
143 break;
144 }
145 case CompileDepth::NoFlowStack: {
146 disable_flow_stack = true;
147 const auto insert_block = [this](NodeBlock& nodes, u32 label) {
148 if (label == static_cast<u32>(exit_branch)) {
149 return;
150 }
151 basic_blocks.insert({label, nodes});
152 };
153 const auto& blocks = shader_info.blocks;
154 NodeBlock current_block;
155 u32 current_label = static_cast<u32>(exit_branch);
156 for (const auto& block : blocks) {
157 if (shader_info.labels.contains(block.start)) {
158 insert_block(current_block, current_label);
159 current_block.clear();
160 current_label = block.start;
161 }
162 if (!block.ignore_branch) {
163 DecodeRangeInner(current_block, block.start, block.end);
164 InsertControlFlow(current_block, block);
165 } else {
166 DecodeRangeInner(current_block, block.start, block.end + 1);
167 }
168 }
169 insert_block(current_block, current_label);
170 break;
171 }
172 case CompileDepth::DecompileBackwards:
173 case CompileDepth::FullDecompile: {
174 program_manager = std::move(shader_info.manager);
175 disable_flow_stack = true;
176 decompiled = true;
177 ASTDecoder decoder{*this};
178 ASTNode program = GetASTProgram();
179 decoder.Visit(program);
180 break;
181 }
182 default:
183 LOG_CRITICAL(HW_GPU, "Unknown decompilation mode!");
184 [[fallthrough]];
185 case CompileDepth::BruteForce: {
186 const auto shader_end = static_cast<u32>(program_code.size());
187 coverage_begin = main_offset;
188 coverage_end = shader_end;
189 for (u32 label = main_offset; label < shader_end; ++label) {
190 basic_blocks.insert({label, DecodeRange(label, label + 1)});
191 }
192 break;
193 }
194 }
195 if (settings.depth != shader_info.settings.depth) {
196 LOG_WARNING(
197 HW_GPU, "Decompiling to this setting \"{}\" failed, downgrading to this setting \"{}\"",
198 CompileDepthAsString(settings.depth), CompileDepthAsString(shader_info.settings.depth));
199 }
200}
201
202NodeBlock ShaderIR::DecodeRange(u32 begin, u32 end) {
203 NodeBlock basic_block;
204 DecodeRangeInner(basic_block, begin, end);
205 return basic_block;
206}
207
208void ShaderIR::DecodeRangeInner(NodeBlock& bb, u32 begin, u32 end) {
209 for (u32 pc = begin; pc < (begin > end ? MAX_PROGRAM_LENGTH : end);) {
210 pc = DecodeInstr(bb, pc);
211 }
212}
213
214void ShaderIR::InsertControlFlow(NodeBlock& bb, const ShaderBlock& block) {
215 const auto apply_conditions = [&](const Condition& cond, Node n) -> Node {
216 Node result = n;
217 if (cond.cc != ConditionCode::T) {
218 result = Conditional(GetConditionCode(cond.cc), {result});
219 }
220 if (cond.predicate != Pred::UnusedIndex) {
221 u32 pred = static_cast<u32>(cond.predicate);
222 const bool is_neg = pred > 7;
223 if (is_neg) {
224 pred -= 8;
225 }
226 result = Conditional(GetPredicate(pred, is_neg), {result});
227 }
228 return result;
229 };
230 if (std::holds_alternative<SingleBranch>(*block.branch)) {
231 auto branch = std::get_if<SingleBranch>(block.branch.get());
232 if (branch->address < 0) {
233 if (branch->kill) {
234 Node n = Operation(OperationCode::Discard);
235 n = apply_conditions(branch->condition, n);
236 bb.push_back(n);
237 global_code.push_back(n);
238 return;
239 }
240 Node n = Operation(OperationCode::Exit);
241 n = apply_conditions(branch->condition, n);
242 bb.push_back(n);
243 global_code.push_back(n);
244 return;
245 }
246 Node n = Operation(OperationCode::Branch, Immediate(branch->address));
247 n = apply_conditions(branch->condition, n);
248 bb.push_back(n);
249 global_code.push_back(n);
250 return;
251 }
252 auto multi_branch = std::get_if<MultiBranch>(block.branch.get());
253 Node op_a = GetRegister(multi_branch->gpr);
254 for (auto& branch_case : multi_branch->branches) {
255 Node n = Operation(OperationCode::Branch, Immediate(branch_case.address));
256 Node op_b = Immediate(branch_case.cmp_value);
257 Node condition =
258 GetPredicateComparisonInteger(Tegra::Shader::PredCondition::EQ, false, op_a, op_b);
259 auto result = Conditional(condition, {n});
260 bb.push_back(result);
261 global_code.push_back(result);
262 }
263}
264
265u32 ShaderIR::DecodeInstr(NodeBlock& bb, u32 pc) {
266 // Ignore sched instructions when generating code.
267 if (IsSchedInstruction(pc, main_offset)) {
268 return pc + 1;
269 }
270
271 const Instruction instr = {program_code[pc]};
272 const auto opcode = OpCode::Decode(instr);
273 const u32 nv_address = ConvertAddressToNvidiaSpace(pc);
274
275 // Decoding failure
276 if (!opcode) {
277 UNIMPLEMENTED_MSG("Unhandled instruction: {0:x}", instr.value);
278 bb.push_back(Comment(fmt::format("{:05x} Unimplemented Shader instruction (0x{:016x})",
279 nv_address, instr.value)));
280 return pc + 1;
281 }
282
283 bb.push_back(Comment(
284 fmt::format("{:05x} {} (0x{:016x})", nv_address, opcode->get().GetName(), instr.value)));
285
286 using Tegra::Shader::Pred;
287 UNIMPLEMENTED_IF_MSG(instr.pred.full_pred == Pred::NeverExecute,
288 "NeverExecute predicate not implemented");
289
290 static const std::map<OpCode::Type, u32 (ShaderIR::*)(NodeBlock&, u32)> decoders = {
291 {OpCode::Type::Arithmetic, &ShaderIR::DecodeArithmetic},
292 {OpCode::Type::ArithmeticImmediate, &ShaderIR::DecodeArithmeticImmediate},
293 {OpCode::Type::Bfe, &ShaderIR::DecodeBfe},
294 {OpCode::Type::Bfi, &ShaderIR::DecodeBfi},
295 {OpCode::Type::Shift, &ShaderIR::DecodeShift},
296 {OpCode::Type::ArithmeticInteger, &ShaderIR::DecodeArithmeticInteger},
297 {OpCode::Type::ArithmeticIntegerImmediate, &ShaderIR::DecodeArithmeticIntegerImmediate},
298 {OpCode::Type::ArithmeticHalf, &ShaderIR::DecodeArithmeticHalf},
299 {OpCode::Type::ArithmeticHalfImmediate, &ShaderIR::DecodeArithmeticHalfImmediate},
300 {OpCode::Type::Ffma, &ShaderIR::DecodeFfma},
301 {OpCode::Type::Hfma2, &ShaderIR::DecodeHfma2},
302 {OpCode::Type::Conversion, &ShaderIR::DecodeConversion},
303 {OpCode::Type::Warp, &ShaderIR::DecodeWarp},
304 {OpCode::Type::Memory, &ShaderIR::DecodeMemory},
305 {OpCode::Type::Texture, &ShaderIR::DecodeTexture},
306 {OpCode::Type::Image, &ShaderIR::DecodeImage},
307 {OpCode::Type::FloatSetPredicate, &ShaderIR::DecodeFloatSetPredicate},
308 {OpCode::Type::IntegerSetPredicate, &ShaderIR::DecodeIntegerSetPredicate},
309 {OpCode::Type::HalfSetPredicate, &ShaderIR::DecodeHalfSetPredicate},
310 {OpCode::Type::PredicateSetRegister, &ShaderIR::DecodePredicateSetRegister},
311 {OpCode::Type::PredicateSetPredicate, &ShaderIR::DecodePredicateSetPredicate},
312 {OpCode::Type::RegisterSetPredicate, &ShaderIR::DecodeRegisterSetPredicate},
313 {OpCode::Type::FloatSet, &ShaderIR::DecodeFloatSet},
314 {OpCode::Type::IntegerSet, &ShaderIR::DecodeIntegerSet},
315 {OpCode::Type::HalfSet, &ShaderIR::DecodeHalfSet},
316 {OpCode::Type::Video, &ShaderIR::DecodeVideo},
317 {OpCode::Type::Xmad, &ShaderIR::DecodeXmad},
318 };
319
320 std::vector<Node> tmp_block;
321 if (const auto decoder = decoders.find(opcode->get().GetType()); decoder != decoders.end()) {
322 pc = (this->*decoder->second)(tmp_block, pc);
323 } else {
324 pc = DecodeOther(tmp_block, pc);
325 }
326
327 // Some instructions (like SSY) don't have a predicate field, they are always unconditionally
328 // executed.
329 const bool can_be_predicated = OpCode::IsPredicatedInstruction(opcode->get().GetId());
330 const auto pred_index = static_cast<u32>(instr.pred.pred_index);
331
332 if (can_be_predicated && pred_index != static_cast<u32>(Pred::UnusedIndex)) {
333 const Node conditional =
334 Conditional(GetPredicate(pred_index, instr.negate_pred != 0), std::move(tmp_block));
335 global_code.push_back(conditional);
336 bb.push_back(conditional);
337 } else {
338 for (auto& node : tmp_block) {
339 global_code.push_back(node);
340 bb.push_back(node);
341 }
342 }
343
344 return pc + 1;
345}
346
347void ShaderIR::PostDecode() {
348 // Deduce texture handler size if needed
349 auto gpu_driver = registry.AccessGuestDriverProfile();
350 DeduceTextureHandlerSize(gpu_driver, used_samplers);
351 // Deduce Indexed Samplers
352 if (!uses_indexed_samplers) {
353 return;
354 }
355 for (auto& sampler : used_samplers) {
356 if (!sampler.is_indexed) {
357 continue;
358 }
359 if (const auto size = TryDeduceSamplerSize(sampler, gpu_driver, used_samplers)) {
360 sampler.size = *size;
361 } else {
362 LOG_CRITICAL(HW_GPU, "Failed to deduce size of indexed sampler");
363 sampler.size = 1;
364 }
365 }
366}
367
368} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode/arithmetic.cpp b/src/video_core/shader/decode/arithmetic.cpp
deleted file mode 100644
index 15eb700e7..000000000
--- a/src/video_core/shader/decode/arithmetic.cpp
+++ /dev/null
@@ -1,166 +0,0 @@
1// Copyright 2018 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/assert.h"
6#include "common/common_types.h"
7#include "common/logging/log.h"
8#include "video_core/engines/shader_bytecode.h"
9#include "video_core/shader/node_helper.h"
10#include "video_core/shader/shader_ir.h"
11
12namespace VideoCommon::Shader {
13
14using Tegra::Shader::Instruction;
15using Tegra::Shader::OpCode;
16using Tegra::Shader::SubOp;
17
18u32 ShaderIR::DecodeArithmetic(NodeBlock& bb, u32 pc) {
19 const Instruction instr = {program_code[pc]};
20 const auto opcode = OpCode::Decode(instr);
21
22 Node op_a = GetRegister(instr.gpr8);
23
24 Node op_b = [&] {
25 if (instr.is_b_imm) {
26 return GetImmediate19(instr);
27 } else if (instr.is_b_gpr) {
28 return GetRegister(instr.gpr20);
29 } else {
30 return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset());
31 }
32 }();
33
34 switch (opcode->get().GetId()) {
35 case OpCode::Id::MOV_C:
36 case OpCode::Id::MOV_R: {
37 // MOV does not have neither 'abs' nor 'neg' bits.
38 SetRegister(bb, instr.gpr0, op_b);
39 break;
40 }
41 case OpCode::Id::FMUL_C:
42 case OpCode::Id::FMUL_R:
43 case OpCode::Id::FMUL_IMM: {
44 // FMUL does not have 'abs' bits and only the second operand has a 'neg' bit.
45 if (instr.fmul.tab5cb8_2 != 0) {
46 LOG_DEBUG(HW_GPU, "FMUL tab5cb8_2({}) is not implemented",
47 instr.fmul.tab5cb8_2.Value());
48 }
49 if (instr.fmul.tab5c68_0 != 1) {
50 LOG_DEBUG(HW_GPU, "FMUL tab5cb8_0({}) is not implemented",
51 instr.fmul.tab5c68_0.Value());
52 }
53
54 op_b = GetOperandAbsNegFloat(op_b, false, instr.fmul.negate_b);
55
56 static constexpr std::array FmulPostFactor = {
57 1.000f, // None
58 0.500f, // Divide 2
59 0.250f, // Divide 4
60 0.125f, // Divide 8
61 8.000f, // Mul 8
62 4.000f, // Mul 4
63 2.000f, // Mul 2
64 };
65
66 if (instr.fmul.postfactor != 0) {
67 op_a = Operation(OperationCode::FMul, NO_PRECISE, op_a,
68 Immediate(FmulPostFactor[instr.fmul.postfactor]));
69 }
70
71 // TODO(Rodrigo): Should precise be used when there's a postfactor?
72 Node value = Operation(OperationCode::FMul, PRECISE, op_a, op_b);
73
74 value = GetSaturatedFloat(value, instr.alu.saturate_d);
75
76 SetInternalFlagsFromFloat(bb, value, instr.generates_cc);
77 SetRegister(bb, instr.gpr0, value);
78 break;
79 }
80 case OpCode::Id::FADD_C:
81 case OpCode::Id::FADD_R:
82 case OpCode::Id::FADD_IMM: {
83 op_a = GetOperandAbsNegFloat(op_a, instr.alu.abs_a, instr.alu.negate_a);
84 op_b = GetOperandAbsNegFloat(op_b, instr.alu.abs_b, instr.alu.negate_b);
85
86 Node value = Operation(OperationCode::FAdd, PRECISE, op_a, op_b);
87 value = GetSaturatedFloat(value, instr.alu.saturate_d);
88
89 SetInternalFlagsFromFloat(bb, value, instr.generates_cc);
90 SetRegister(bb, instr.gpr0, value);
91 break;
92 }
93 case OpCode::Id::MUFU: {
94 op_a = GetOperandAbsNegFloat(op_a, instr.alu.abs_a, instr.alu.negate_a);
95
96 Node value = [&]() {
97 switch (instr.sub_op) {
98 case SubOp::Cos:
99 return Operation(OperationCode::FCos, PRECISE, op_a);
100 case SubOp::Sin:
101 return Operation(OperationCode::FSin, PRECISE, op_a);
102 case SubOp::Ex2:
103 return Operation(OperationCode::FExp2, PRECISE, op_a);
104 case SubOp::Lg2:
105 return Operation(OperationCode::FLog2, PRECISE, op_a);
106 case SubOp::Rcp:
107 return Operation(OperationCode::FDiv, PRECISE, Immediate(1.0f), op_a);
108 case SubOp::Rsq:
109 return Operation(OperationCode::FInverseSqrt, PRECISE, op_a);
110 case SubOp::Sqrt:
111 return Operation(OperationCode::FSqrt, PRECISE, op_a);
112 default:
113 UNIMPLEMENTED_MSG("Unhandled MUFU sub op={0:x}", instr.sub_op.Value());
114 return Immediate(0);
115 }
116 }();
117 value = GetSaturatedFloat(value, instr.alu.saturate_d);
118
119 SetRegister(bb, instr.gpr0, value);
120 break;
121 }
122 case OpCode::Id::FMNMX_C:
123 case OpCode::Id::FMNMX_R:
124 case OpCode::Id::FMNMX_IMM: {
125 op_a = GetOperandAbsNegFloat(op_a, instr.alu.abs_a, instr.alu.negate_a);
126 op_b = GetOperandAbsNegFloat(op_b, instr.alu.abs_b, instr.alu.negate_b);
127
128 const Node condition = GetPredicate(instr.alu.fmnmx.pred, instr.alu.fmnmx.negate_pred != 0);
129
130 const Node min = Operation(OperationCode::FMin, NO_PRECISE, op_a, op_b);
131 const Node max = Operation(OperationCode::FMax, NO_PRECISE, op_a, op_b);
132 const Node value = Operation(OperationCode::Select, NO_PRECISE, condition, min, max);
133
134 SetInternalFlagsFromFloat(bb, value, instr.generates_cc);
135 SetRegister(bb, instr.gpr0, value);
136 break;
137 }
138 case OpCode::Id::FCMP_RR:
139 case OpCode::Id::FCMP_RC:
140 case OpCode::Id::FCMP_IMMR: {
141 UNIMPLEMENTED_IF(instr.fcmp.ftz == 0);
142 Node op_c = GetRegister(instr.gpr39);
143 Node comp = GetPredicateComparisonFloat(instr.fcmp.cond, std::move(op_c), Immediate(0.0f));
144 SetRegister(
145 bb, instr.gpr0,
146 Operation(OperationCode::Select, std::move(comp), std::move(op_a), std::move(op_b)));
147 break;
148 }
149 case OpCode::Id::RRO_C:
150 case OpCode::Id::RRO_R:
151 case OpCode::Id::RRO_IMM: {
152 LOG_DEBUG(HW_GPU, "(STUBBED) RRO used");
153
154 // Currently RRO is only implemented as a register move.
155 op_b = GetOperandAbsNegFloat(op_b, instr.alu.abs_b, instr.alu.negate_b);
156 SetRegister(bb, instr.gpr0, op_b);
157 break;
158 }
159 default:
160 UNIMPLEMENTED_MSG("Unhandled arithmetic instruction: {}", opcode->get().GetName());
161 }
162
163 return pc;
164}
165
166} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode/arithmetic_half.cpp b/src/video_core/shader/decode/arithmetic_half.cpp
deleted file mode 100644
index 88103fede..000000000
--- a/src/video_core/shader/decode/arithmetic_half.cpp
+++ /dev/null
@@ -1,101 +0,0 @@
1// Copyright 2018 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/assert.h"
6#include "common/common_types.h"
7#include "common/logging/log.h"
8#include "video_core/engines/shader_bytecode.h"
9#include "video_core/shader/node_helper.h"
10#include "video_core/shader/shader_ir.h"
11
12namespace VideoCommon::Shader {
13
14using Tegra::Shader::HalfType;
15using Tegra::Shader::Instruction;
16using Tegra::Shader::OpCode;
17
18u32 ShaderIR::DecodeArithmeticHalf(NodeBlock& bb, u32 pc) {
19 const Instruction instr = {program_code[pc]};
20 const auto opcode = OpCode::Decode(instr);
21
22 bool negate_a = false;
23 bool negate_b = false;
24 bool absolute_a = false;
25 bool absolute_b = false;
26
27 switch (opcode->get().GetId()) {
28 case OpCode::Id::HADD2_R:
29 if (instr.alu_half.ftz == 0) {
30 LOG_DEBUG(HW_GPU, "{} without FTZ is not implemented", opcode->get().GetName());
31 }
32 negate_a = ((instr.value >> 43) & 1) != 0;
33 negate_b = ((instr.value >> 31) & 1) != 0;
34 absolute_a = ((instr.value >> 44) & 1) != 0;
35 absolute_b = ((instr.value >> 30) & 1) != 0;
36 break;
37 case OpCode::Id::HADD2_C:
38 if (instr.alu_half.ftz == 0) {
39 LOG_DEBUG(HW_GPU, "{} without FTZ is not implemented", opcode->get().GetName());
40 }
41 negate_a = ((instr.value >> 43) & 1) != 0;
42 negate_b = ((instr.value >> 56) & 1) != 0;
43 absolute_a = ((instr.value >> 44) & 1) != 0;
44 absolute_b = ((instr.value >> 54) & 1) != 0;
45 break;
46 case OpCode::Id::HMUL2_R:
47 negate_a = ((instr.value >> 43) & 1) != 0;
48 absolute_a = ((instr.value >> 44) & 1) != 0;
49 absolute_b = ((instr.value >> 30) & 1) != 0;
50 break;
51 case OpCode::Id::HMUL2_C:
52 negate_b = ((instr.value >> 31) & 1) != 0;
53 absolute_a = ((instr.value >> 44) & 1) != 0;
54 absolute_b = ((instr.value >> 54) & 1) != 0;
55 break;
56 default:
57 UNREACHABLE();
58 break;
59 }
60
61 Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.alu_half.type_a);
62 op_a = GetOperandAbsNegHalf(op_a, absolute_a, negate_a);
63
64 auto [type_b, op_b] = [this, instr, opcode]() -> std::pair<HalfType, Node> {
65 switch (opcode->get().GetId()) {
66 case OpCode::Id::HADD2_C:
67 case OpCode::Id::HMUL2_C:
68 return {HalfType::F32, GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset())};
69 case OpCode::Id::HADD2_R:
70 case OpCode::Id::HMUL2_R:
71 return {instr.alu_half.type_b, GetRegister(instr.gpr20)};
72 default:
73 UNREACHABLE();
74 return {HalfType::F32, Immediate(0)};
75 }
76 }();
77 op_b = UnpackHalfFloat(op_b, type_b);
78 op_b = GetOperandAbsNegHalf(op_b, absolute_b, negate_b);
79
80 Node value = [this, opcode, op_a, op_b = op_b] {
81 switch (opcode->get().GetId()) {
82 case OpCode::Id::HADD2_C:
83 case OpCode::Id::HADD2_R:
84 return Operation(OperationCode::HAdd, PRECISE, op_a, op_b);
85 case OpCode::Id::HMUL2_C:
86 case OpCode::Id::HMUL2_R:
87 return Operation(OperationCode::HMul, PRECISE, op_a, op_b);
88 default:
89 UNIMPLEMENTED_MSG("Unhandled half float instruction: {}", opcode->get().GetName());
90 return Immediate(0);
91 }
92 }();
93 value = GetSaturatedHalfFloat(value, instr.alu_half.saturate);
94 value = HalfMerge(GetRegister(instr.gpr0), value, instr.alu_half.merge);
95
96 SetRegister(bb, instr.gpr0, value);
97
98 return pc;
99}
100
101} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode/arithmetic_half_immediate.cpp b/src/video_core/shader/decode/arithmetic_half_immediate.cpp
deleted file mode 100644
index d179b9873..000000000
--- a/src/video_core/shader/decode/arithmetic_half_immediate.cpp
+++ /dev/null
@@ -1,54 +0,0 @@
1// Copyright 2018 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/assert.h"
6#include "common/common_types.h"
7#include "common/logging/log.h"
8#include "video_core/engines/shader_bytecode.h"
9#include "video_core/shader/node_helper.h"
10#include "video_core/shader/shader_ir.h"
11
12namespace VideoCommon::Shader {
13
14using Tegra::Shader::Instruction;
15using Tegra::Shader::OpCode;
16
17u32 ShaderIR::DecodeArithmeticHalfImmediate(NodeBlock& bb, u32 pc) {
18 const Instruction instr = {program_code[pc]};
19 const auto opcode = OpCode::Decode(instr);
20
21 if (opcode->get().GetId() == OpCode::Id::HADD2_IMM) {
22 if (instr.alu_half_imm.ftz == 0) {
23 LOG_DEBUG(HW_GPU, "{} without FTZ is not implemented", opcode->get().GetName());
24 }
25 } else {
26 if (instr.alu_half_imm.precision != Tegra::Shader::HalfPrecision::FTZ) {
27 LOG_DEBUG(HW_GPU, "{} without FTZ is not implemented", opcode->get().GetName());
28 }
29 }
30
31 Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.alu_half_imm.type_a);
32 op_a = GetOperandAbsNegHalf(op_a, instr.alu_half_imm.abs_a, instr.alu_half_imm.negate_a);
33
34 const Node op_b = UnpackHalfImmediate(instr, true);
35
36 Node value = [&]() {
37 switch (opcode->get().GetId()) {
38 case OpCode::Id::HADD2_IMM:
39 return Operation(OperationCode::HAdd, PRECISE, op_a, op_b);
40 case OpCode::Id::HMUL2_IMM:
41 return Operation(OperationCode::HMul, PRECISE, op_a, op_b);
42 default:
43 UNREACHABLE();
44 return Immediate(0);
45 }
46 }();
47
48 value = GetSaturatedHalfFloat(value, instr.alu_half_imm.saturate);
49 value = HalfMerge(GetRegister(instr.gpr0), value, instr.alu_half_imm.merge);
50 SetRegister(bb, instr.gpr0, value);
51 return pc;
52}
53
54} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode/arithmetic_immediate.cpp b/src/video_core/shader/decode/arithmetic_immediate.cpp
deleted file mode 100644
index f1875967c..000000000
--- a/src/video_core/shader/decode/arithmetic_immediate.cpp
+++ /dev/null
@@ -1,53 +0,0 @@
1// Copyright 2018 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/assert.h"
6#include "common/common_types.h"
7#include "video_core/engines/shader_bytecode.h"
8#include "video_core/shader/node_helper.h"
9#include "video_core/shader/shader_ir.h"
10
11namespace VideoCommon::Shader {
12
13using Tegra::Shader::Instruction;
14using Tegra::Shader::OpCode;
15
16u32 ShaderIR::DecodeArithmeticImmediate(NodeBlock& bb, u32 pc) {
17 const Instruction instr = {program_code[pc]};
18 const auto opcode = OpCode::Decode(instr);
19
20 switch (opcode->get().GetId()) {
21 case OpCode::Id::MOV32_IMM: {
22 SetRegister(bb, instr.gpr0, GetImmediate32(instr));
23 break;
24 }
25 case OpCode::Id::FMUL32_IMM: {
26 Node value =
27 Operation(OperationCode::FMul, PRECISE, GetRegister(instr.gpr8), GetImmediate32(instr));
28 value = GetSaturatedFloat(value, instr.fmul32.saturate);
29
30 SetInternalFlagsFromFloat(bb, value, instr.op_32.generates_cc);
31 SetRegister(bb, instr.gpr0, value);
32 break;
33 }
34 case OpCode::Id::FADD32I: {
35 const Node op_a = GetOperandAbsNegFloat(GetRegister(instr.gpr8), instr.fadd32i.abs_a,
36 instr.fadd32i.negate_a);
37 const Node op_b = GetOperandAbsNegFloat(GetImmediate32(instr), instr.fadd32i.abs_b,
38 instr.fadd32i.negate_b);
39
40 const Node value = Operation(OperationCode::FAdd, PRECISE, op_a, op_b);
41 SetInternalFlagsFromFloat(bb, value, instr.op_32.generates_cc);
42 SetRegister(bb, instr.gpr0, value);
43 break;
44 }
45 default:
46 UNIMPLEMENTED_MSG("Unhandled arithmetic immediate instruction: {}",
47 opcode->get().GetName());
48 }
49
50 return pc;
51}
52
53} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode/arithmetic_integer.cpp b/src/video_core/shader/decode/arithmetic_integer.cpp
deleted file mode 100644
index 7b5bb7003..000000000
--- a/src/video_core/shader/decode/arithmetic_integer.cpp
+++ /dev/null
@@ -1,375 +0,0 @@
1// Copyright 2018 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/assert.h"
6#include "common/common_types.h"
7#include "video_core/engines/shader_bytecode.h"
8#include "video_core/shader/node_helper.h"
9#include "video_core/shader/shader_ir.h"
10
11namespace VideoCommon::Shader {
12
13using Tegra::Shader::IAdd3Height;
14using Tegra::Shader::Instruction;
15using Tegra::Shader::OpCode;
16using Tegra::Shader::Pred;
17using Tegra::Shader::Register;
18
19u32 ShaderIR::DecodeArithmeticInteger(NodeBlock& bb, u32 pc) {
20 const Instruction instr = {program_code[pc]};
21 const auto opcode = OpCode::Decode(instr);
22
23 Node op_a = GetRegister(instr.gpr8);
24 Node op_b = [&]() {
25 if (instr.is_b_imm) {
26 return Immediate(instr.alu.GetSignedImm20_20());
27 } else if (instr.is_b_gpr) {
28 return GetRegister(instr.gpr20);
29 } else {
30 return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset());
31 }
32 }();
33
34 switch (opcode->get().GetId()) {
35 case OpCode::Id::IADD_C:
36 case OpCode::Id::IADD_R:
37 case OpCode::Id::IADD_IMM: {
38 UNIMPLEMENTED_IF_MSG(instr.alu.saturate_d, "IADD.SAT");
39 UNIMPLEMENTED_IF_MSG(instr.iadd.x && instr.generates_cc, "IADD.X Rd.CC");
40
41 op_a = GetOperandAbsNegInteger(op_a, false, instr.alu_integer.negate_a, true);
42 op_b = GetOperandAbsNegInteger(op_b, false, instr.alu_integer.negate_b, true);
43
44 Node value = Operation(OperationCode::UAdd, op_a, op_b);
45
46 if (instr.iadd.x) {
47 Node carry = GetInternalFlag(InternalFlag::Carry);
48 Node x = Operation(OperationCode::Select, std::move(carry), Immediate(1), Immediate(0));
49 value = Operation(OperationCode::UAdd, std::move(value), std::move(x));
50 }
51
52 if (instr.generates_cc) {
53 const Node i0 = Immediate(0);
54
55 Node zero = Operation(OperationCode::LogicalIEqual, value, i0);
56 Node sign = Operation(OperationCode::LogicalILessThan, value, i0);
57 Node carry = Operation(OperationCode::LogicalAddCarry, op_a, op_b);
58
59 Node pos_a = Operation(OperationCode::LogicalIGreaterThan, op_a, i0);
60 Node pos_b = Operation(OperationCode::LogicalIGreaterThan, op_b, i0);
61 Node pos = Operation(OperationCode::LogicalAnd, std::move(pos_a), std::move(pos_b));
62 Node overflow = Operation(OperationCode::LogicalAnd, pos, sign);
63
64 SetInternalFlag(bb, InternalFlag::Zero, std::move(zero));
65 SetInternalFlag(bb, InternalFlag::Sign, std::move(sign));
66 SetInternalFlag(bb, InternalFlag::Carry, std::move(carry));
67 SetInternalFlag(bb, InternalFlag::Overflow, std::move(overflow));
68 }
69 SetRegister(bb, instr.gpr0, std::move(value));
70 break;
71 }
72 case OpCode::Id::IADD3_C:
73 case OpCode::Id::IADD3_R:
74 case OpCode::Id::IADD3_IMM: {
75 Node op_c = GetRegister(instr.gpr39);
76
77 const auto ApplyHeight = [&](IAdd3Height height, Node value) {
78 switch (height) {
79 case IAdd3Height::None:
80 return value;
81 case IAdd3Height::LowerHalfWord:
82 return BitfieldExtract(value, 0, 16);
83 case IAdd3Height::UpperHalfWord:
84 return BitfieldExtract(value, 16, 16);
85 default:
86 UNIMPLEMENTED_MSG("Unhandled IADD3 height: {}", height);
87 return Immediate(0);
88 }
89 };
90
91 if (opcode->get().GetId() == OpCode::Id::IADD3_R) {
92 op_a = ApplyHeight(instr.iadd3.height_a, op_a);
93 op_b = ApplyHeight(instr.iadd3.height_b, op_b);
94 op_c = ApplyHeight(instr.iadd3.height_c, op_c);
95 }
96
97 op_a = GetOperandAbsNegInteger(op_a, false, instr.iadd3.neg_a, true);
98 op_b = GetOperandAbsNegInteger(op_b, false, instr.iadd3.neg_b, true);
99 op_c = GetOperandAbsNegInteger(op_c, false, instr.iadd3.neg_c, true);
100
101 const Node value = [&] {
102 Node add_ab = Operation(OperationCode::IAdd, NO_PRECISE, op_a, op_b);
103 if (opcode->get().GetId() != OpCode::Id::IADD3_R) {
104 return Operation(OperationCode::IAdd, NO_PRECISE, add_ab, op_c);
105 }
106 const Node shifted = [&] {
107 switch (instr.iadd3.mode) {
108 case Tegra::Shader::IAdd3Mode::RightShift:
109 // TODO(tech4me): According to
110 // https://envytools.readthedocs.io/en/latest/hw/graph/maxwell/cuda/int.html?highlight=iadd3
111 // The addition between op_a and op_b should be done in uint33, more
112 // investigation required
113 return Operation(OperationCode::ILogicalShiftRight, NO_PRECISE, add_ab,
114 Immediate(16));
115 case Tegra::Shader::IAdd3Mode::LeftShift:
116 return Operation(OperationCode::ILogicalShiftLeft, NO_PRECISE, add_ab,
117 Immediate(16));
118 default:
119 return add_ab;
120 }
121 }();
122 return Operation(OperationCode::IAdd, NO_PRECISE, shifted, op_c);
123 }();
124
125 SetInternalFlagsFromInteger(bb, value, instr.generates_cc);
126 SetRegister(bb, instr.gpr0, value);
127 break;
128 }
129 case OpCode::Id::ISCADD_C:
130 case OpCode::Id::ISCADD_R:
131 case OpCode::Id::ISCADD_IMM: {
132 UNIMPLEMENTED_IF_MSG(instr.generates_cc,
133 "Condition codes generation in ISCADD is not implemented");
134
135 op_a = GetOperandAbsNegInteger(op_a, false, instr.alu_integer.negate_a, true);
136 op_b = GetOperandAbsNegInteger(op_b, false, instr.alu_integer.negate_b, true);
137
138 const Node shift = Immediate(static_cast<u32>(instr.alu_integer.shift_amount));
139 const Node shifted_a = Operation(OperationCode::ILogicalShiftLeft, NO_PRECISE, op_a, shift);
140 const Node value = Operation(OperationCode::IAdd, NO_PRECISE, shifted_a, op_b);
141
142 SetInternalFlagsFromInteger(bb, value, instr.generates_cc);
143 SetRegister(bb, instr.gpr0, value);
144 break;
145 }
146 case OpCode::Id::POPC_C:
147 case OpCode::Id::POPC_R:
148 case OpCode::Id::POPC_IMM: {
149 if (instr.popc.invert) {
150 op_b = Operation(OperationCode::IBitwiseNot, NO_PRECISE, op_b);
151 }
152 const Node value = Operation(OperationCode::IBitCount, PRECISE, op_b);
153 SetRegister(bb, instr.gpr0, value);
154 break;
155 }
156 case OpCode::Id::FLO_R:
157 case OpCode::Id::FLO_C:
158 case OpCode::Id::FLO_IMM: {
159 Node value;
160 if (instr.flo.invert) {
161 op_b = Operation(OperationCode::IBitwiseNot, NO_PRECISE, std::move(op_b));
162 }
163 if (instr.flo.is_signed) {
164 value = Operation(OperationCode::IBitMSB, NO_PRECISE, std::move(op_b));
165 } else {
166 value = Operation(OperationCode::UBitMSB, NO_PRECISE, std::move(op_b));
167 }
168 if (instr.flo.sh) {
169 value =
170 Operation(OperationCode::UBitwiseXor, NO_PRECISE, std::move(value), Immediate(31));
171 }
172 SetRegister(bb, instr.gpr0, std::move(value));
173 break;
174 }
175 case OpCode::Id::SEL_C:
176 case OpCode::Id::SEL_R:
177 case OpCode::Id::SEL_IMM: {
178 const Node condition = GetPredicate(instr.sel.pred, instr.sel.neg_pred != 0);
179 const Node value = Operation(OperationCode::Select, PRECISE, condition, op_a, op_b);
180 SetRegister(bb, instr.gpr0, value);
181 break;
182 }
183 case OpCode::Id::ICMP_CR:
184 case OpCode::Id::ICMP_R:
185 case OpCode::Id::ICMP_RC:
186 case OpCode::Id::ICMP_IMM: {
187 const Node zero = Immediate(0);
188
189 const auto [op_rhs, test] = [&]() -> std::pair<Node, Node> {
190 switch (opcode->get().GetId()) {
191 case OpCode::Id::ICMP_CR:
192 return {GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()),
193 GetRegister(instr.gpr39)};
194 case OpCode::Id::ICMP_R:
195 return {GetRegister(instr.gpr20), GetRegister(instr.gpr39)};
196 case OpCode::Id::ICMP_RC:
197 return {GetRegister(instr.gpr39),
198 GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset())};
199 case OpCode::Id::ICMP_IMM:
200 return {Immediate(instr.alu.GetSignedImm20_20()), GetRegister(instr.gpr39)};
201 default:
202 UNREACHABLE();
203 return {zero, zero};
204 }
205 }();
206 const Node op_lhs = GetRegister(instr.gpr8);
207 const Node comparison =
208 GetPredicateComparisonInteger(instr.icmp.cond, instr.icmp.is_signed != 0, test, zero);
209 SetRegister(bb, instr.gpr0, Operation(OperationCode::Select, comparison, op_lhs, op_rhs));
210 break;
211 }
212 case OpCode::Id::LOP_C:
213 case OpCode::Id::LOP_R:
214 case OpCode::Id::LOP_IMM: {
215 if (instr.alu.lop.invert_a)
216 op_a = Operation(OperationCode::IBitwiseNot, NO_PRECISE, op_a);
217 if (instr.alu.lop.invert_b)
218 op_b = Operation(OperationCode::IBitwiseNot, NO_PRECISE, op_b);
219
220 WriteLogicOperation(bb, instr.gpr0, instr.alu.lop.operation, op_a, op_b,
221 instr.alu.lop.pred_result_mode, instr.alu.lop.pred48,
222 instr.generates_cc);
223 break;
224 }
225 case OpCode::Id::LOP3_C:
226 case OpCode::Id::LOP3_R:
227 case OpCode::Id::LOP3_IMM: {
228 const Node op_c = GetRegister(instr.gpr39);
229 const Node lut = [&]() {
230 if (opcode->get().GetId() == OpCode::Id::LOP3_R) {
231 return Immediate(instr.alu.lop3.GetImmLut28());
232 } else {
233 return Immediate(instr.alu.lop3.GetImmLut48());
234 }
235 }();
236
237 WriteLop3Instruction(bb, instr.gpr0, op_a, op_b, op_c, lut, instr.generates_cc);
238 break;
239 }
240 case OpCode::Id::IMNMX_C:
241 case OpCode::Id::IMNMX_R:
242 case OpCode::Id::IMNMX_IMM: {
243 UNIMPLEMENTED_IF(instr.imnmx.exchange != Tegra::Shader::IMinMaxExchange::None);
244
245 const bool is_signed = instr.imnmx.is_signed;
246
247 const Node condition = GetPredicate(instr.imnmx.pred, instr.imnmx.negate_pred != 0);
248 const Node min = SignedOperation(OperationCode::IMin, is_signed, NO_PRECISE, op_a, op_b);
249 const Node max = SignedOperation(OperationCode::IMax, is_signed, NO_PRECISE, op_a, op_b);
250 const Node value = Operation(OperationCode::Select, NO_PRECISE, condition, min, max);
251
252 SetInternalFlagsFromInteger(bb, value, instr.generates_cc);
253 SetRegister(bb, instr.gpr0, value);
254 break;
255 }
256 case OpCode::Id::LEA_R2:
257 case OpCode::Id::LEA_R1:
258 case OpCode::Id::LEA_IMM:
259 case OpCode::Id::LEA_RZ:
260 case OpCode::Id::LEA_HI: {
261 auto [op_a_, op_b_, op_c_] = [&]() -> std::tuple<Node, Node, Node> {
262 switch (opcode->get().GetId()) {
263 case OpCode::Id::LEA_R2: {
264 return {GetRegister(instr.gpr20), GetRegister(instr.gpr39),
265 Immediate(static_cast<u32>(instr.lea.r2.entry_a))};
266 }
267 case OpCode::Id::LEA_R1: {
268 const bool neg = instr.lea.r1.neg != 0;
269 return {GetOperandAbsNegInteger(GetRegister(instr.gpr8), false, neg, true),
270 GetRegister(instr.gpr20),
271 Immediate(static_cast<u32>(instr.lea.r1.entry_a))};
272 }
273 case OpCode::Id::LEA_IMM: {
274 const bool neg = instr.lea.imm.neg != 0;
275 return {GetOperandAbsNegInteger(GetRegister(instr.gpr8), false, neg, true),
276 Immediate(static_cast<u32>(instr.lea.imm.entry_a)),
277 Immediate(static_cast<u32>(instr.lea.imm.entry_b))};
278 }
279 case OpCode::Id::LEA_RZ: {
280 const bool neg = instr.lea.rz.neg != 0;
281 return {GetConstBuffer(instr.lea.rz.cb_index, instr.lea.rz.cb_offset),
282 GetOperandAbsNegInteger(GetRegister(instr.gpr8), false, neg, true),
283 Immediate(static_cast<u32>(instr.lea.rz.entry_a))};
284 }
285 case OpCode::Id::LEA_HI:
286 default:
287 UNIMPLEMENTED_MSG("Unhandled LEA subinstruction: {}", opcode->get().GetName());
288
289 return {Immediate(static_cast<u32>(instr.lea.imm.entry_a)), GetRegister(instr.gpr8),
290 Immediate(static_cast<u32>(instr.lea.imm.entry_b))};
291 }
292 }();
293
294 UNIMPLEMENTED_IF_MSG(instr.lea.pred48 != static_cast<u64>(Pred::UnusedIndex),
295 "Unhandled LEA Predicate");
296
297 Node value =
298 Operation(OperationCode::ILogicalShiftLeft, std::move(op_a_), std::move(op_c_));
299 value = Operation(OperationCode::IAdd, std::move(op_b_), std::move(value));
300 SetRegister(bb, instr.gpr0, std::move(value));
301
302 break;
303 }
304 default:
305 UNIMPLEMENTED_MSG("Unhandled ArithmeticInteger instruction: {}", opcode->get().GetName());
306 }
307
308 return pc;
309}
310
311void ShaderIR::WriteLop3Instruction(NodeBlock& bb, Register dest, Node op_a, Node op_b, Node op_c,
312 Node imm_lut, bool sets_cc) {
313 const Node lop3_fast = [&](const Node na, const Node nb, const Node nc, const Node ttbl) {
314 Node value = Immediate(0);
315 const ImmediateNode imm = std::get<ImmediateNode>(*ttbl);
316 if (imm.GetValue() & 0x01) {
317 const Node a = Operation(OperationCode::IBitwiseNot, na);
318 const Node b = Operation(OperationCode::IBitwiseNot, nb);
319 const Node c = Operation(OperationCode::IBitwiseNot, nc);
320 Node r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, a, b);
321 r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, r, c);
322 value = Operation(OperationCode::IBitwiseOr, value, r);
323 }
324 if (imm.GetValue() & 0x02) {
325 const Node a = Operation(OperationCode::IBitwiseNot, na);
326 const Node b = Operation(OperationCode::IBitwiseNot, nb);
327 Node r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, a, b);
328 r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, r, nc);
329 value = Operation(OperationCode::IBitwiseOr, value, r);
330 }
331 if (imm.GetValue() & 0x04) {
332 const Node a = Operation(OperationCode::IBitwiseNot, na);
333 const Node c = Operation(OperationCode::IBitwiseNot, nc);
334 Node r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, a, nb);
335 r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, r, c);
336 value = Operation(OperationCode::IBitwiseOr, value, r);
337 }
338 if (imm.GetValue() & 0x08) {
339 const Node a = Operation(OperationCode::IBitwiseNot, na);
340 Node r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, a, nb);
341 r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, r, nc);
342 value = Operation(OperationCode::IBitwiseOr, value, r);
343 }
344 if (imm.GetValue() & 0x10) {
345 const Node b = Operation(OperationCode::IBitwiseNot, nb);
346 const Node c = Operation(OperationCode::IBitwiseNot, nc);
347 Node r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, na, b);
348 r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, r, c);
349 value = Operation(OperationCode::IBitwiseOr, value, r);
350 }
351 if (imm.GetValue() & 0x20) {
352 const Node b = Operation(OperationCode::IBitwiseNot, nb);
353 Node r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, na, b);
354 r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, r, nc);
355 value = Operation(OperationCode::IBitwiseOr, value, r);
356 }
357 if (imm.GetValue() & 0x40) {
358 const Node c = Operation(OperationCode::IBitwiseNot, nc);
359 Node r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, na, nb);
360 r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, r, c);
361 value = Operation(OperationCode::IBitwiseOr, value, r);
362 }
363 if (imm.GetValue() & 0x80) {
364 Node r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, na, nb);
365 r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, r, nc);
366 value = Operation(OperationCode::IBitwiseOr, value, r);
367 }
368 return value;
369 }(op_a, op_b, op_c, imm_lut);
370
371 SetInternalFlagsFromInteger(bb, lop3_fast, sets_cc);
372 SetRegister(bb, dest, lop3_fast);
373}
374
375} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode/arithmetic_integer_immediate.cpp b/src/video_core/shader/decode/arithmetic_integer_immediate.cpp
deleted file mode 100644
index 73580277a..000000000
--- a/src/video_core/shader/decode/arithmetic_integer_immediate.cpp
+++ /dev/null
@@ -1,99 +0,0 @@
1// Copyright 2018 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/assert.h"
6#include "common/common_types.h"
7#include "video_core/engines/shader_bytecode.h"
8#include "video_core/shader/node_helper.h"
9#include "video_core/shader/shader_ir.h"
10
11namespace VideoCommon::Shader {
12
13using Tegra::Shader::Instruction;
14using Tegra::Shader::LogicOperation;
15using Tegra::Shader::OpCode;
16using Tegra::Shader::Pred;
17using Tegra::Shader::PredicateResultMode;
18using Tegra::Shader::Register;
19
20u32 ShaderIR::DecodeArithmeticIntegerImmediate(NodeBlock& bb, u32 pc) {
21 const Instruction instr = {program_code[pc]};
22 const auto opcode = OpCode::Decode(instr);
23
24 Node op_a = GetRegister(instr.gpr8);
25 Node op_b = Immediate(static_cast<s32>(instr.alu.imm20_32));
26
27 switch (opcode->get().GetId()) {
28 case OpCode::Id::IADD32I: {
29 UNIMPLEMENTED_IF_MSG(instr.iadd32i.saturate, "IADD32I saturation is not implemented");
30
31 op_a = GetOperandAbsNegInteger(std::move(op_a), false, instr.iadd32i.negate_a != 0, true);
32
33 Node value = Operation(OperationCode::IAdd, PRECISE, std::move(op_a), std::move(op_b));
34
35 SetInternalFlagsFromInteger(bb, value, instr.op_32.generates_cc != 0);
36 SetRegister(bb, instr.gpr0, std::move(value));
37 break;
38 }
39 case OpCode::Id::LOP32I: {
40 if (instr.alu.lop32i.invert_a) {
41 op_a = Operation(OperationCode::IBitwiseNot, NO_PRECISE, std::move(op_a));
42 }
43
44 if (instr.alu.lop32i.invert_b) {
45 op_b = Operation(OperationCode::IBitwiseNot, NO_PRECISE, std::move(op_b));
46 }
47
48 WriteLogicOperation(bb, instr.gpr0, instr.alu.lop32i.operation, std::move(op_a),
49 std::move(op_b), PredicateResultMode::None, Pred::UnusedIndex,
50 instr.op_32.generates_cc != 0);
51 break;
52 }
53 default:
54 UNIMPLEMENTED_MSG("Unhandled ArithmeticIntegerImmediate instruction: {}",
55 opcode->get().GetName());
56 }
57
58 return pc;
59}
60
61void ShaderIR::WriteLogicOperation(NodeBlock& bb, Register dest, LogicOperation logic_op, Node op_a,
62 Node op_b, PredicateResultMode predicate_mode, Pred predicate,
63 bool sets_cc) {
64 Node result = [&] {
65 switch (logic_op) {
66 case LogicOperation::And:
67 return Operation(OperationCode::IBitwiseAnd, PRECISE, std::move(op_a), std::move(op_b));
68 case LogicOperation::Or:
69 return Operation(OperationCode::IBitwiseOr, PRECISE, std::move(op_a), std::move(op_b));
70 case LogicOperation::Xor:
71 return Operation(OperationCode::IBitwiseXor, PRECISE, std::move(op_a), std::move(op_b));
72 case LogicOperation::PassB:
73 return op_b;
74 default:
75 UNIMPLEMENTED_MSG("Unimplemented logic operation={}", logic_op);
76 return Immediate(0);
77 }
78 }();
79
80 SetInternalFlagsFromInteger(bb, result, sets_cc);
81 SetRegister(bb, dest, result);
82
83 // Write the predicate value depending on the predicate mode.
84 switch (predicate_mode) {
85 case PredicateResultMode::None:
86 // Do nothing.
87 return;
88 case PredicateResultMode::NotZero: {
89 // Set the predicate to true if the result is not zero.
90 Node compare = Operation(OperationCode::LogicalINotEqual, std::move(result), Immediate(0));
91 SetPredicate(bb, static_cast<u64>(predicate), std::move(compare));
92 break;
93 }
94 default:
95 UNIMPLEMENTED_MSG("Unimplemented predicate result mode: {}", predicate_mode);
96 }
97}
98
99} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode/bfe.cpp b/src/video_core/shader/decode/bfe.cpp
deleted file mode 100644
index 8e3b46e8e..000000000
--- a/src/video_core/shader/decode/bfe.cpp
+++ /dev/null
@@ -1,77 +0,0 @@
1// Copyright 2018 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/assert.h"
6#include "common/common_types.h"
7#include "video_core/engines/shader_bytecode.h"
8#include "video_core/shader/node_helper.h"
9#include "video_core/shader/shader_ir.h"
10
11namespace VideoCommon::Shader {
12
13using Tegra::Shader::Instruction;
14using Tegra::Shader::OpCode;
15
16u32 ShaderIR::DecodeBfe(NodeBlock& bb, u32 pc) {
17 const Instruction instr = {program_code[pc]};
18 const auto opcode = OpCode::Decode(instr);
19
20 Node op_a = GetRegister(instr.gpr8);
21 Node op_b = [&] {
22 switch (opcode->get().GetId()) {
23 case OpCode::Id::BFE_R:
24 return GetRegister(instr.gpr20);
25 case OpCode::Id::BFE_C:
26 return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset());
27 case OpCode::Id::BFE_IMM:
28 return Immediate(instr.alu.GetSignedImm20_20());
29 default:
30 UNREACHABLE();
31 return Immediate(0);
32 }
33 }();
34
35 UNIMPLEMENTED_IF_MSG(instr.bfe.rd_cc, "Condition codes in BFE is not implemented");
36
37 const bool is_signed = instr.bfe.is_signed;
38
39 // using reverse parallel method in
40 // https://graphics.stanford.edu/~seander/bithacks.html#ReverseParallel
41 // note for later if possible to implement faster method.
42 if (instr.bfe.brev) {
43 const auto swap = [&](u32 s, u32 mask) {
44 Node v1 =
45 SignedOperation(OperationCode::ILogicalShiftRight, is_signed, op_a, Immediate(s));
46 if (mask != 0) {
47 v1 = SignedOperation(OperationCode::IBitwiseAnd, is_signed, std::move(v1),
48 Immediate(mask));
49 }
50 Node v2 = op_a;
51 if (mask != 0) {
52 v2 = SignedOperation(OperationCode::IBitwiseAnd, is_signed, std::move(v2),
53 Immediate(mask));
54 }
55 v2 = SignedOperation(OperationCode::ILogicalShiftLeft, is_signed, std::move(v2),
56 Immediate(s));
57 return SignedOperation(OperationCode::IBitwiseOr, is_signed, std::move(v1),
58 std::move(v2));
59 };
60 op_a = swap(1, 0x55555555U);
61 op_a = swap(2, 0x33333333U);
62 op_a = swap(4, 0x0F0F0F0FU);
63 op_a = swap(8, 0x00FF00FFU);
64 op_a = swap(16, 0);
65 }
66
67 const auto offset = SignedOperation(OperationCode::IBitfieldExtract, is_signed, op_b,
68 Immediate(0), Immediate(8));
69 const auto bits = SignedOperation(OperationCode::IBitfieldExtract, is_signed, op_b,
70 Immediate(8), Immediate(8));
71 auto result = SignedOperation(OperationCode::IBitfieldExtract, is_signed, op_a, offset, bits);
72 SetRegister(bb, instr.gpr0, std::move(result));
73
74 return pc;
75}
76
77} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode/bfi.cpp b/src/video_core/shader/decode/bfi.cpp
deleted file mode 100644
index 70d1c055b..000000000
--- a/src/video_core/shader/decode/bfi.cpp
+++ /dev/null
@@ -1,45 +0,0 @@
1// Copyright 2018 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/assert.h"
6#include "common/common_types.h"
7#include "video_core/engines/shader_bytecode.h"
8#include "video_core/shader/node_helper.h"
9#include "video_core/shader/shader_ir.h"
10
11namespace VideoCommon::Shader {
12
13using Tegra::Shader::Instruction;
14using Tegra::Shader::OpCode;
15
16u32 ShaderIR::DecodeBfi(NodeBlock& bb, u32 pc) {
17 const Instruction instr = {program_code[pc]};
18 const auto opcode = OpCode::Decode(instr);
19
20 const auto [packed_shift, base] = [&]() -> std::pair<Node, Node> {
21 switch (opcode->get().GetId()) {
22 case OpCode::Id::BFI_RC:
23 return {GetRegister(instr.gpr39),
24 GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset())};
25 case OpCode::Id::BFI_IMM_R:
26 return {Immediate(instr.alu.GetSignedImm20_20()), GetRegister(instr.gpr39)};
27 default:
28 UNREACHABLE();
29 return {Immediate(0), Immediate(0)};
30 }
31 }();
32 const Node insert = GetRegister(instr.gpr8);
33 const Node offset = BitfieldExtract(packed_shift, 0, 8);
34 const Node bits = BitfieldExtract(packed_shift, 8, 8);
35
36 const Node value =
37 Operation(OperationCode::UBitfieldInsert, PRECISE, base, insert, offset, bits);
38
39 SetInternalFlagsFromInteger(bb, value, instr.generates_cc);
40 SetRegister(bb, instr.gpr0, value);
41
42 return pc;
43}
44
45} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode/conversion.cpp b/src/video_core/shader/decode/conversion.cpp
deleted file mode 100644
index fea7a54df..000000000
--- a/src/video_core/shader/decode/conversion.cpp
+++ /dev/null
@@ -1,321 +0,0 @@
1// Copyright 2018 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <limits>
6#include <optional>
7#include <utility>
8
9#include "common/assert.h"
10#include "common/common_types.h"
11#include "video_core/engines/shader_bytecode.h"
12#include "video_core/shader/node_helper.h"
13#include "video_core/shader/shader_ir.h"
14
15namespace VideoCommon::Shader {
16
17using Tegra::Shader::Instruction;
18using Tegra::Shader::OpCode;
19using Tegra::Shader::Register;
20
21namespace {
22
23constexpr OperationCode GetFloatSelector(u64 selector) {
24 return selector == 0 ? OperationCode::FCastHalf0 : OperationCode::FCastHalf1;
25}
26
27constexpr u32 SizeInBits(Register::Size size) {
28 switch (size) {
29 case Register::Size::Byte:
30 return 8;
31 case Register::Size::Short:
32 return 16;
33 case Register::Size::Word:
34 return 32;
35 case Register::Size::Long:
36 return 64;
37 }
38 return 0;
39}
40
41constexpr std::optional<std::pair<s32, s32>> IntegerSaturateBounds(Register::Size src_size,
42 Register::Size dst_size,
43 bool src_signed,
44 bool dst_signed) {
45 const u32 dst_bits = SizeInBits(dst_size);
46 if (src_size == Register::Size::Word && dst_size == Register::Size::Word) {
47 if (src_signed == dst_signed) {
48 return std::nullopt;
49 }
50 return std::make_pair(0, std::numeric_limits<s32>::max());
51 }
52 if (dst_signed) {
53 // Signed destination, clamp to [-128, 127] for instance
54 return std::make_pair(-(1 << (dst_bits - 1)), (1 << (dst_bits - 1)) - 1);
55 } else {
56 // Unsigned destination
57 if (dst_bits == 32) {
58 // Avoid shifting by 32, that is undefined behavior
59 return std::make_pair(0, s32(std::numeric_limits<u32>::max()));
60 }
61 return std::make_pair(0, (1 << dst_bits) - 1);
62 }
63}
64
65} // Anonymous namespace
66
67u32 ShaderIR::DecodeConversion(NodeBlock& bb, u32 pc) {
68 const Instruction instr = {program_code[pc]};
69 const auto opcode = OpCode::Decode(instr);
70
71 switch (opcode->get().GetId()) {
72 case OpCode::Id::I2I_R:
73 case OpCode::Id::I2I_C:
74 case OpCode::Id::I2I_IMM: {
75 const bool src_signed = instr.conversion.is_input_signed;
76 const bool dst_signed = instr.conversion.is_output_signed;
77 const Register::Size src_size = instr.conversion.src_size;
78 const Register::Size dst_size = instr.conversion.dst_size;
79 const u32 selector = static_cast<u32>(instr.conversion.int_src.selector);
80
81 Node value = [this, instr, opcode] {
82 switch (opcode->get().GetId()) {
83 case OpCode::Id::I2I_R:
84 return GetRegister(instr.gpr20);
85 case OpCode::Id::I2I_C:
86 return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset());
87 case OpCode::Id::I2I_IMM:
88 return Immediate(instr.alu.GetSignedImm20_20());
89 default:
90 UNREACHABLE();
91 return Immediate(0);
92 }
93 }();
94
95 // Ensure the source selector is valid
96 switch (instr.conversion.src_size) {
97 case Register::Size::Byte:
98 break;
99 case Register::Size::Short:
100 ASSERT(selector == 0 || selector == 2);
101 break;
102 default:
103 ASSERT(selector == 0);
104 break;
105 }
106
107 if (src_size != Register::Size::Word || selector != 0) {
108 value = SignedOperation(OperationCode::IBitfieldExtract, src_signed, std::move(value),
109 Immediate(selector * 8), Immediate(SizeInBits(src_size)));
110 }
111
112 value = GetOperandAbsNegInteger(std::move(value), instr.conversion.abs_a,
113 instr.conversion.negate_a, src_signed);
114
115 if (instr.alu.saturate_d) {
116 if (src_signed && !dst_signed) {
117 Node is_negative = Operation(OperationCode::LogicalUGreaterEqual, value,
118 Immediate(1 << (SizeInBits(src_size) - 1)));
119 value = Operation(OperationCode::Select, std::move(is_negative), Immediate(0),
120 std::move(value));
121
122 // Simplify generated expressions, this can be removed without semantic impact
123 SetTemporary(bb, 0, std::move(value));
124 value = GetTemporary(0);
125
126 if (dst_size != Register::Size::Word) {
127 const Node limit = Immediate((1 << SizeInBits(dst_size)) - 1);
128 Node is_large =
129 Operation(OperationCode::LogicalUGreaterThan, std::move(value), limit);
130 value = Operation(OperationCode::Select, std::move(is_large), limit,
131 std::move(value));
132 }
133 } else if (const std::optional bounds =
134 IntegerSaturateBounds(src_size, dst_size, src_signed, dst_signed)) {
135 value = SignedOperation(OperationCode::IMax, src_signed, std::move(value),
136 Immediate(bounds->first));
137 value = SignedOperation(OperationCode::IMin, src_signed, std::move(value),
138 Immediate(bounds->second));
139 }
140 } else if (dst_size != Register::Size::Word) {
141 // No saturation, we only have to mask the result
142 Node mask = Immediate((1 << SizeInBits(dst_size)) - 1);
143 value = Operation(OperationCode::UBitwiseAnd, std::move(value), std::move(mask));
144 }
145
146 SetInternalFlagsFromInteger(bb, value, instr.generates_cc);
147 SetRegister(bb, instr.gpr0, std::move(value));
148 break;
149 }
150 case OpCode::Id::I2F_R:
151 case OpCode::Id::I2F_C:
152 case OpCode::Id::I2F_IMM: {
153 UNIMPLEMENTED_IF(instr.conversion.dst_size == Register::Size::Long);
154 UNIMPLEMENTED_IF_MSG(instr.generates_cc,
155 "Condition codes generation in I2F is not implemented");
156
157 Node value = [&] {
158 switch (opcode->get().GetId()) {
159 case OpCode::Id::I2F_R:
160 return GetRegister(instr.gpr20);
161 case OpCode::Id::I2F_C:
162 return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset());
163 case OpCode::Id::I2F_IMM:
164 return Immediate(instr.alu.GetSignedImm20_20());
165 default:
166 UNREACHABLE();
167 return Immediate(0);
168 }
169 }();
170
171 const bool input_signed = instr.conversion.is_input_signed;
172
173 if (const u32 offset = static_cast<u32>(instr.conversion.int_src.selector); offset > 0) {
174 ASSERT(instr.conversion.src_size == Register::Size::Byte ||
175 instr.conversion.src_size == Register::Size::Short);
176 if (instr.conversion.src_size == Register::Size::Short) {
177 ASSERT(offset == 0 || offset == 2);
178 }
179 value = SignedOperation(OperationCode::ILogicalShiftRight, input_signed,
180 std::move(value), Immediate(offset * 8));
181 }
182
183 value = ConvertIntegerSize(value, instr.conversion.src_size, input_signed);
184 value = GetOperandAbsNegInteger(value, instr.conversion.abs_a, false, input_signed);
185 value = SignedOperation(OperationCode::FCastInteger, input_signed, PRECISE, value);
186 value = GetOperandAbsNegFloat(value, false, instr.conversion.negate_a);
187
188 SetInternalFlagsFromFloat(bb, value, instr.generates_cc);
189
190 if (instr.conversion.dst_size == Register::Size::Short) {
191 value = Operation(OperationCode::HCastFloat, PRECISE, value);
192 }
193
194 SetRegister(bb, instr.gpr0, value);
195 break;
196 }
197 case OpCode::Id::F2F_R:
198 case OpCode::Id::F2F_C:
199 case OpCode::Id::F2F_IMM: {
200 UNIMPLEMENTED_IF(instr.conversion.dst_size == Register::Size::Long);
201 UNIMPLEMENTED_IF(instr.conversion.src_size == Register::Size::Long);
202 UNIMPLEMENTED_IF_MSG(instr.generates_cc,
203 "Condition codes generation in F2F is not implemented");
204
205 Node value = [&]() {
206 switch (opcode->get().GetId()) {
207 case OpCode::Id::F2F_R:
208 return GetRegister(instr.gpr20);
209 case OpCode::Id::F2F_C:
210 return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset());
211 case OpCode::Id::F2F_IMM:
212 return GetImmediate19(instr);
213 default:
214 UNREACHABLE();
215 return Immediate(0);
216 }
217 }();
218
219 if (instr.conversion.src_size == Register::Size::Short) {
220 value = Operation(GetFloatSelector(instr.conversion.float_src.selector), NO_PRECISE,
221 std::move(value));
222 } else {
223 ASSERT(instr.conversion.float_src.selector == 0);
224 }
225
226 value = GetOperandAbsNegFloat(value, instr.conversion.abs_a, instr.conversion.negate_a);
227
228 value = [&] {
229 if (instr.conversion.src_size != instr.conversion.dst_size) {
230 // Rounding operations only matter when the source and destination conversion size
231 // is the same.
232 return value;
233 }
234 switch (instr.conversion.f2f.GetRoundingMode()) {
235 case Tegra::Shader::F2fRoundingOp::None:
236 return value;
237 case Tegra::Shader::F2fRoundingOp::Round:
238 return Operation(OperationCode::FRoundEven, value);
239 case Tegra::Shader::F2fRoundingOp::Floor:
240 return Operation(OperationCode::FFloor, value);
241 case Tegra::Shader::F2fRoundingOp::Ceil:
242 return Operation(OperationCode::FCeil, value);
243 case Tegra::Shader::F2fRoundingOp::Trunc:
244 return Operation(OperationCode::FTrunc, value);
245 default:
246 UNIMPLEMENTED_MSG("Unimplemented F2F rounding mode {}",
247 instr.conversion.f2f.rounding.Value());
248 return value;
249 }
250 }();
251 value = GetSaturatedFloat(value, instr.alu.saturate_d);
252
253 SetInternalFlagsFromFloat(bb, value, instr.generates_cc);
254
255 if (instr.conversion.dst_size == Register::Size::Short) {
256 value = Operation(OperationCode::HCastFloat, PRECISE, value);
257 }
258
259 SetRegister(bb, instr.gpr0, value);
260 break;
261 }
262 case OpCode::Id::F2I_R:
263 case OpCode::Id::F2I_C:
264 case OpCode::Id::F2I_IMM: {
265 UNIMPLEMENTED_IF(instr.conversion.src_size == Register::Size::Long);
266 UNIMPLEMENTED_IF_MSG(instr.generates_cc,
267 "Condition codes generation in F2I is not implemented");
268 Node value = [&]() {
269 switch (opcode->get().GetId()) {
270 case OpCode::Id::F2I_R:
271 return GetRegister(instr.gpr20);
272 case OpCode::Id::F2I_C:
273 return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset());
274 case OpCode::Id::F2I_IMM:
275 return GetImmediate19(instr);
276 default:
277 UNREACHABLE();
278 return Immediate(0);
279 }
280 }();
281
282 if (instr.conversion.src_size == Register::Size::Short) {
283 value = Operation(GetFloatSelector(instr.conversion.float_src.selector), NO_PRECISE,
284 std::move(value));
285 } else {
286 ASSERT(instr.conversion.float_src.selector == 0);
287 }
288
289 value = GetOperandAbsNegFloat(value, instr.conversion.abs_a, instr.conversion.negate_a);
290
291 value = [&]() {
292 switch (instr.conversion.f2i.rounding) {
293 case Tegra::Shader::F2iRoundingOp::RoundEven:
294 return Operation(OperationCode::FRoundEven, PRECISE, value);
295 case Tegra::Shader::F2iRoundingOp::Floor:
296 return Operation(OperationCode::FFloor, PRECISE, value);
297 case Tegra::Shader::F2iRoundingOp::Ceil:
298 return Operation(OperationCode::FCeil, PRECISE, value);
299 case Tegra::Shader::F2iRoundingOp::Trunc:
300 return Operation(OperationCode::FTrunc, PRECISE, value);
301 default:
302 UNIMPLEMENTED_MSG("Unimplemented F2I rounding mode {}",
303 instr.conversion.f2i.rounding.Value());
304 return Immediate(0);
305 }
306 }();
307 const bool is_signed = instr.conversion.is_output_signed;
308 value = SignedOperation(OperationCode::ICastFloat, is_signed, PRECISE, value);
309 value = ConvertIntegerSize(value, instr.conversion.dst_size, is_signed);
310
311 SetRegister(bb, instr.gpr0, value);
312 break;
313 }
314 default:
315 UNIMPLEMENTED_MSG("Unhandled conversion instruction: {}", opcode->get().GetName());
316 }
317
318 return pc;
319}
320
321} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode/ffma.cpp b/src/video_core/shader/decode/ffma.cpp
deleted file mode 100644
index 5973588d6..000000000
--- a/src/video_core/shader/decode/ffma.cpp
+++ /dev/null
@@ -1,62 +0,0 @@
1// Copyright 2018 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/assert.h"
6#include "common/common_types.h"
7#include "video_core/engines/shader_bytecode.h"
8#include "video_core/shader/node_helper.h"
9#include "video_core/shader/shader_ir.h"
10
11namespace VideoCommon::Shader {
12
13using Tegra::Shader::Instruction;
14using Tegra::Shader::OpCode;
15
16u32 ShaderIR::DecodeFfma(NodeBlock& bb, u32 pc) {
17 const Instruction instr = {program_code[pc]};
18 const auto opcode = OpCode::Decode(instr);
19
20 UNIMPLEMENTED_IF_MSG(instr.ffma.cc != 0, "FFMA cc not implemented");
21 if (instr.ffma.tab5980_0 != 1) {
22 LOG_DEBUG(HW_GPU, "FFMA tab5980_0({}) not implemented", instr.ffma.tab5980_0.Value());
23 }
24 if (instr.ffma.tab5980_1 != 0) {
25 LOG_DEBUG(HW_GPU, "FFMA tab5980_1({}) not implemented", instr.ffma.tab5980_1.Value());
26 }
27
28 const Node op_a = GetRegister(instr.gpr8);
29
30 auto [op_b, op_c] = [&]() -> std::tuple<Node, Node> {
31 switch (opcode->get().GetId()) {
32 case OpCode::Id::FFMA_CR: {
33 return {GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()),
34 GetRegister(instr.gpr39)};
35 }
36 case OpCode::Id::FFMA_RR:
37 return {GetRegister(instr.gpr20), GetRegister(instr.gpr39)};
38 case OpCode::Id::FFMA_RC: {
39 return {GetRegister(instr.gpr39),
40 GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset())};
41 }
42 case OpCode::Id::FFMA_IMM:
43 return {GetImmediate19(instr), GetRegister(instr.gpr39)};
44 default:
45 UNIMPLEMENTED_MSG("Unhandled FFMA instruction: {}", opcode->get().GetName());
46 return {Immediate(0), Immediate(0)};
47 }
48 }();
49
50 op_b = GetOperandAbsNegFloat(op_b, false, instr.ffma.negate_b);
51 op_c = GetOperandAbsNegFloat(op_c, false, instr.ffma.negate_c);
52
53 Node value = Operation(OperationCode::FFma, PRECISE, op_a, op_b, op_c);
54 value = GetSaturatedFloat(value, instr.alu.saturate_d);
55
56 SetInternalFlagsFromFloat(bb, value, instr.generates_cc);
57 SetRegister(bb, instr.gpr0, value);
58
59 return pc;
60}
61
62} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode/float_set.cpp b/src/video_core/shader/decode/float_set.cpp
deleted file mode 100644
index 5614e8a0d..000000000
--- a/src/video_core/shader/decode/float_set.cpp
+++ /dev/null
@@ -1,58 +0,0 @@
1// Copyright 2018 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/assert.h"
6#include "common/common_types.h"
7#include "video_core/engines/shader_bytecode.h"
8#include "video_core/shader/node_helper.h"
9#include "video_core/shader/shader_ir.h"
10
11namespace VideoCommon::Shader {
12
13using Tegra::Shader::Instruction;
14using Tegra::Shader::OpCode;
15
16u32 ShaderIR::DecodeFloatSet(NodeBlock& bb, u32 pc) {
17 const Instruction instr = {program_code[pc]};
18
19 const Node op_a = GetOperandAbsNegFloat(GetRegister(instr.gpr8), instr.fset.abs_a != 0,
20 instr.fset.neg_a != 0);
21
22 Node op_b = [&]() {
23 if (instr.is_b_imm) {
24 return GetImmediate19(instr);
25 } else if (instr.is_b_gpr) {
26 return GetRegister(instr.gpr20);
27 } else {
28 return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset());
29 }
30 }();
31
32 op_b = GetOperandAbsNegFloat(op_b, instr.fset.abs_b != 0, instr.fset.neg_b != 0);
33
34 // The fset instruction sets a register to 1.0 or -1 (depending on the bf bit) if the
35 // condition is true, and to 0 otherwise.
36 const Node second_pred = GetPredicate(instr.fset.pred39, instr.fset.neg_pred != 0);
37
38 const OperationCode combiner = GetPredicateCombiner(instr.fset.op);
39 const Node first_pred = GetPredicateComparisonFloat(instr.fset.cond, op_a, op_b);
40
41 const Node predicate = Operation(combiner, first_pred, second_pred);
42
43 const Node true_value = instr.fset.bf ? Immediate(1.0f) : Immediate(-1);
44 const Node false_value = instr.fset.bf ? Immediate(0.0f) : Immediate(0);
45 const Node value =
46 Operation(OperationCode::Select, PRECISE, predicate, true_value, false_value);
47
48 if (instr.fset.bf) {
49 SetInternalFlagsFromFloat(bb, value, instr.generates_cc);
50 } else {
51 SetInternalFlagsFromInteger(bb, value, instr.generates_cc);
52 }
53 SetRegister(bb, instr.gpr0, value);
54
55 return pc;
56}
57
58} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode/float_set_predicate.cpp b/src/video_core/shader/decode/float_set_predicate.cpp
deleted file mode 100644
index 200c2c983..000000000
--- a/src/video_core/shader/decode/float_set_predicate.cpp
+++ /dev/null
@@ -1,57 +0,0 @@
1// Copyright 2018 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/assert.h"
6#include "common/common_types.h"
7#include "video_core/engines/shader_bytecode.h"
8#include "video_core/shader/node_helper.h"
9#include "video_core/shader/shader_ir.h"
10
11namespace VideoCommon::Shader {
12
13using Tegra::Shader::Instruction;
14using Tegra::Shader::OpCode;
15using Tegra::Shader::Pred;
16
17u32 ShaderIR::DecodeFloatSetPredicate(NodeBlock& bb, u32 pc) {
18 const Instruction instr = {program_code[pc]};
19
20 Node op_a = GetOperandAbsNegFloat(GetRegister(instr.gpr8), instr.fsetp.abs_a != 0,
21 instr.fsetp.neg_a != 0);
22 Node op_b = [&]() {
23 if (instr.is_b_imm) {
24 return GetImmediate19(instr);
25 } else if (instr.is_b_gpr) {
26 return GetRegister(instr.gpr20);
27 } else {
28 return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset());
29 }
30 }();
31 op_b = GetOperandAbsNegFloat(std::move(op_b), instr.fsetp.abs_b, instr.fsetp.neg_b);
32
33 // We can't use the constant predicate as destination.
34 ASSERT(instr.fsetp.pred3 != static_cast<u64>(Pred::UnusedIndex));
35
36 const Node predicate =
37 GetPredicateComparisonFloat(instr.fsetp.cond, std::move(op_a), std::move(op_b));
38 const Node second_pred = GetPredicate(instr.fsetp.pred39, instr.fsetp.neg_pred != 0);
39
40 const OperationCode combiner = GetPredicateCombiner(instr.fsetp.op);
41 const Node value = Operation(combiner, predicate, second_pred);
42
43 // Set the primary predicate to the result of Predicate OP SecondPredicate
44 SetPredicate(bb, instr.fsetp.pred3, value);
45
46 if (instr.fsetp.pred0 != static_cast<u64>(Pred::UnusedIndex)) {
47 // Set the secondary predicate to the result of !Predicate OP SecondPredicate,
48 // if enabled
49 const Node negated_pred = Operation(OperationCode::LogicalNegate, predicate);
50 const Node second_value = Operation(combiner, negated_pred, second_pred);
51 SetPredicate(bb, instr.fsetp.pred0, second_value);
52 }
53
54 return pc;
55}
56
57} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode/half_set.cpp b/src/video_core/shader/decode/half_set.cpp
deleted file mode 100644
index fa83108cd..000000000
--- a/src/video_core/shader/decode/half_set.cpp
+++ /dev/null
@@ -1,115 +0,0 @@
1// Copyright 2018 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <array>
6
7#include "common/assert.h"
8#include "common/common_types.h"
9#include "common/logging/log.h"
10#include "video_core/engines/shader_bytecode.h"
11#include "video_core/shader/node_helper.h"
12#include "video_core/shader/shader_ir.h"
13
14namespace VideoCommon::Shader {
15
16using std::move;
17using Tegra::Shader::Instruction;
18using Tegra::Shader::OpCode;
19using Tegra::Shader::PredCondition;
20
21u32 ShaderIR::DecodeHalfSet(NodeBlock& bb, u32 pc) {
22 const Instruction instr = {program_code[pc]};
23 const auto opcode = OpCode::Decode(instr);
24
25 PredCondition cond{};
26 bool bf = false;
27 bool ftz = false;
28 bool neg_a = false;
29 bool abs_a = false;
30 bool neg_b = false;
31 bool abs_b = false;
32 switch (opcode->get().GetId()) {
33 case OpCode::Id::HSET2_C:
34 case OpCode::Id::HSET2_IMM:
35 cond = instr.hsetp2.cbuf_and_imm.cond;
36 bf = instr.Bit(53);
37 ftz = instr.Bit(54);
38 neg_a = instr.Bit(43);
39 abs_a = instr.Bit(44);
40 neg_b = instr.Bit(56);
41 abs_b = instr.Bit(54);
42 break;
43 case OpCode::Id::HSET2_R:
44 cond = instr.hsetp2.reg.cond;
45 bf = instr.Bit(49);
46 ftz = instr.Bit(50);
47 neg_a = instr.Bit(43);
48 abs_a = instr.Bit(44);
49 neg_b = instr.Bit(31);
50 abs_b = instr.Bit(30);
51 break;
52 default:
53 UNREACHABLE();
54 }
55
56 Node op_b = [this, instr, opcode] {
57 switch (opcode->get().GetId()) {
58 case OpCode::Id::HSET2_C:
59 // Inform as unimplemented as this is not tested.
60 UNIMPLEMENTED_MSG("HSET2_C is not implemented");
61 return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset());
62 case OpCode::Id::HSET2_R:
63 return GetRegister(instr.gpr20);
64 case OpCode::Id::HSET2_IMM:
65 return UnpackHalfImmediate(instr, true);
66 default:
67 UNREACHABLE();
68 return Node{};
69 }
70 }();
71
72 if (!ftz) {
73 LOG_DEBUG(HW_GPU, "{} without FTZ is not implemented", opcode->get().GetName());
74 }
75
76 Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.hset2.type_a);
77 op_a = GetOperandAbsNegHalf(op_a, abs_a, neg_a);
78
79 switch (opcode->get().GetId()) {
80 case OpCode::Id::HSET2_R:
81 op_b = GetOperandAbsNegHalf(move(op_b), abs_b, neg_b);
82 [[fallthrough]];
83 case OpCode::Id::HSET2_C:
84 op_b = UnpackHalfFloat(move(op_b), instr.hset2.type_b);
85 break;
86 default:
87 break;
88 }
89
90 Node second_pred = GetPredicate(instr.hset2.pred39, instr.hset2.neg_pred);
91
92 Node comparison_pair = GetPredicateComparisonHalf(cond, op_a, op_b);
93
94 const OperationCode combiner = GetPredicateCombiner(instr.hset2.op);
95
96 // HSET2 operates on each half float in the pack.
97 std::array<Node, 2> values;
98 for (u32 i = 0; i < 2; ++i) {
99 const u32 raw_value = bf ? 0x3c00 : 0xffff;
100 Node true_value = Immediate(raw_value << (i * 16));
101 Node false_value = Immediate(0);
102
103 Node comparison = Operation(OperationCode::LogicalPick2, comparison_pair, Immediate(i));
104 Node predicate = Operation(combiner, comparison, second_pred);
105 values[i] =
106 Operation(OperationCode::Select, predicate, move(true_value), move(false_value));
107 }
108
109 Node value = Operation(OperationCode::UBitwiseOr, values[0], values[1]);
110 SetRegister(bb, instr.gpr0, move(value));
111
112 return pc;
113}
114
115} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode/half_set_predicate.cpp b/src/video_core/shader/decode/half_set_predicate.cpp
deleted file mode 100644
index 310655619..000000000
--- a/src/video_core/shader/decode/half_set_predicate.cpp
+++ /dev/null
@@ -1,80 +0,0 @@
1// Copyright 2018 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/assert.h"
6#include "common/common_types.h"
7#include "common/logging/log.h"
8#include "video_core/engines/shader_bytecode.h"
9#include "video_core/shader/node_helper.h"
10#include "video_core/shader/shader_ir.h"
11
12namespace VideoCommon::Shader {
13
14using Tegra::Shader::Instruction;
15using Tegra::Shader::OpCode;
16using Tegra::Shader::Pred;
17
18u32 ShaderIR::DecodeHalfSetPredicate(NodeBlock& bb, u32 pc) {
19 const Instruction instr = {program_code[pc]};
20 const auto opcode = OpCode::Decode(instr);
21
22 if (instr.hsetp2.ftz != 0) {
23 LOG_DEBUG(HW_GPU, "{} without FTZ is not implemented", opcode->get().GetName());
24 }
25
26 Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.hsetp2.type_a);
27 op_a = GetOperandAbsNegHalf(op_a, instr.hsetp2.abs_a, instr.hsetp2.negate_a);
28
29 Tegra::Shader::PredCondition cond{};
30 bool h_and{};
31 Node op_b{};
32 switch (opcode->get().GetId()) {
33 case OpCode::Id::HSETP2_C:
34 cond = instr.hsetp2.cbuf_and_imm.cond;
35 h_and = instr.hsetp2.cbuf_and_imm.h_and;
36 op_b = GetOperandAbsNegHalf(GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()),
37 instr.hsetp2.cbuf.abs_b, instr.hsetp2.cbuf.negate_b);
38 // F32 is hardcoded in hardware
39 op_b = UnpackHalfFloat(std::move(op_b), Tegra::Shader::HalfType::F32);
40 break;
41 case OpCode::Id::HSETP2_IMM:
42 cond = instr.hsetp2.cbuf_and_imm.cond;
43 h_and = instr.hsetp2.cbuf_and_imm.h_and;
44 op_b = UnpackHalfImmediate(instr, true);
45 break;
46 case OpCode::Id::HSETP2_R:
47 cond = instr.hsetp2.reg.cond;
48 h_and = instr.hsetp2.reg.h_and;
49 op_b =
50 GetOperandAbsNegHalf(UnpackHalfFloat(GetRegister(instr.gpr20), instr.hsetp2.reg.type_b),
51 instr.hsetp2.reg.abs_b, instr.hsetp2.reg.negate_b);
52 break;
53 default:
54 UNREACHABLE();
55 op_b = Immediate(0);
56 }
57
58 const OperationCode combiner = GetPredicateCombiner(instr.hsetp2.op);
59 const Node combined_pred = GetPredicate(instr.hsetp2.pred39, instr.hsetp2.neg_pred);
60
61 const auto Write = [&](u64 dest, Node src) {
62 SetPredicate(bb, dest, Operation(combiner, std::move(src), combined_pred));
63 };
64
65 const Node comparison = GetPredicateComparisonHalf(cond, op_a, op_b);
66 const u64 first = instr.hsetp2.pred3;
67 const u64 second = instr.hsetp2.pred0;
68 if (h_and) {
69 Node joined = Operation(OperationCode::LogicalAnd2, comparison);
70 Write(first, joined);
71 Write(second, Operation(OperationCode::LogicalNegate, std::move(joined)));
72 } else {
73 Write(first, Operation(OperationCode::LogicalPick2, comparison, Immediate(0U)));
74 Write(second, Operation(OperationCode::LogicalPick2, comparison, Immediate(1U)));
75 }
76
77 return pc;
78}
79
80} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode/hfma2.cpp b/src/video_core/shader/decode/hfma2.cpp
deleted file mode 100644
index 5b44cb79c..000000000
--- a/src/video_core/shader/decode/hfma2.cpp
+++ /dev/null
@@ -1,73 +0,0 @@
1// Copyright 2018 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <tuple>
6
7#include "common/assert.h"
8#include "common/common_types.h"
9#include "video_core/engines/shader_bytecode.h"
10#include "video_core/shader/node_helper.h"
11#include "video_core/shader/shader_ir.h"
12
13namespace VideoCommon::Shader {
14
15using Tegra::Shader::HalfPrecision;
16using Tegra::Shader::HalfType;
17using Tegra::Shader::Instruction;
18using Tegra::Shader::OpCode;
19
20u32 ShaderIR::DecodeHfma2(NodeBlock& bb, u32 pc) {
21 const Instruction instr = {program_code[pc]};
22 const auto opcode = OpCode::Decode(instr);
23
24 if (opcode->get().GetId() == OpCode::Id::HFMA2_RR) {
25 DEBUG_ASSERT(instr.hfma2.rr.precision == HalfPrecision::None);
26 } else {
27 DEBUG_ASSERT(instr.hfma2.precision == HalfPrecision::None);
28 }
29
30 constexpr auto identity = HalfType::H0_H1;
31 bool neg_b{}, neg_c{};
32 auto [saturate, type_b, op_b, type_c,
33 op_c] = [&]() -> std::tuple<bool, HalfType, Node, HalfType, Node> {
34 switch (opcode->get().GetId()) {
35 case OpCode::Id::HFMA2_CR:
36 neg_b = instr.hfma2.negate_b;
37 neg_c = instr.hfma2.negate_c;
38 return {instr.hfma2.saturate, HalfType::F32,
39 GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()),
40 instr.hfma2.type_reg39, GetRegister(instr.gpr39)};
41 case OpCode::Id::HFMA2_RC:
42 neg_b = instr.hfma2.negate_b;
43 neg_c = instr.hfma2.negate_c;
44 return {instr.hfma2.saturate, instr.hfma2.type_reg39, GetRegister(instr.gpr39),
45 HalfType::F32, GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset())};
46 case OpCode::Id::HFMA2_RR:
47 neg_b = instr.hfma2.rr.negate_b;
48 neg_c = instr.hfma2.rr.negate_c;
49 return {instr.hfma2.rr.saturate, instr.hfma2.type_b, GetRegister(instr.gpr20),
50 instr.hfma2.rr.type_c, GetRegister(instr.gpr39)};
51 case OpCode::Id::HFMA2_IMM_R:
52 neg_c = instr.hfma2.negate_c;
53 return {instr.hfma2.saturate, identity, UnpackHalfImmediate(instr, true),
54 instr.hfma2.type_reg39, GetRegister(instr.gpr39)};
55 default:
56 return {false, identity, Immediate(0), identity, Immediate(0)};
57 }
58 }();
59
60 const Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.hfma2.type_a);
61 op_b = GetOperandAbsNegHalf(UnpackHalfFloat(op_b, type_b), false, neg_b);
62 op_c = GetOperandAbsNegHalf(UnpackHalfFloat(op_c, type_c), false, neg_c);
63
64 Node value = Operation(OperationCode::HFma, PRECISE, op_a, op_b, op_c);
65 value = GetSaturatedHalfFloat(value, saturate);
66 value = HalfMerge(GetRegister(instr.gpr0), value, instr.hfma2.merge);
67
68 SetRegister(bb, instr.gpr0, value);
69
70 return pc;
71}
72
73} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode/image.cpp b/src/video_core/shader/decode/image.cpp
deleted file mode 100644
index 5470e8cf4..000000000
--- a/src/video_core/shader/decode/image.cpp
+++ /dev/null
@@ -1,536 +0,0 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <algorithm>
6#include <vector>
7#include <fmt/format.h>
8
9#include "common/assert.h"
10#include "common/bit_field.h"
11#include "common/common_types.h"
12#include "common/logging/log.h"
13#include "video_core/engines/shader_bytecode.h"
14#include "video_core/shader/node_helper.h"
15#include "video_core/shader/shader_ir.h"
16#include "video_core/textures/texture.h"
17
18namespace VideoCommon::Shader {
19
20using Tegra::Shader::Instruction;
21using Tegra::Shader::OpCode;
22using Tegra::Shader::PredCondition;
23using Tegra::Shader::StoreType;
24using Tegra::Texture::ComponentType;
25using Tegra::Texture::TextureFormat;
26using Tegra::Texture::TICEntry;
27
28namespace {
29
30ComponentType GetComponentType(Tegra::Engines::SamplerDescriptor descriptor,
31 std::size_t component) {
32 const TextureFormat format{descriptor.format};
33 switch (format) {
34 case TextureFormat::R16G16B16A16:
35 case TextureFormat::R32G32B32A32:
36 case TextureFormat::R32G32B32:
37 case TextureFormat::R32G32:
38 case TextureFormat::R16G16:
39 case TextureFormat::R32:
40 case TextureFormat::R16:
41 case TextureFormat::R8:
42 case TextureFormat::R1:
43 if (component == 0) {
44 return descriptor.r_type;
45 }
46 if (component == 1) {
47 return descriptor.g_type;
48 }
49 if (component == 2) {
50 return descriptor.b_type;
51 }
52 if (component == 3) {
53 return descriptor.a_type;
54 }
55 break;
56 case TextureFormat::A8R8G8B8:
57 if (component == 0) {
58 return descriptor.a_type;
59 }
60 if (component == 1) {
61 return descriptor.r_type;
62 }
63 if (component == 2) {
64 return descriptor.g_type;
65 }
66 if (component == 3) {
67 return descriptor.b_type;
68 }
69 break;
70 case TextureFormat::A2B10G10R10:
71 case TextureFormat::A4B4G4R4:
72 case TextureFormat::A5B5G5R1:
73 case TextureFormat::A1B5G5R5:
74 if (component == 0) {
75 return descriptor.a_type;
76 }
77 if (component == 1) {
78 return descriptor.b_type;
79 }
80 if (component == 2) {
81 return descriptor.g_type;
82 }
83 if (component == 3) {
84 return descriptor.r_type;
85 }
86 break;
87 case TextureFormat::R32_B24G8:
88 if (component == 0) {
89 return descriptor.r_type;
90 }
91 if (component == 1) {
92 return descriptor.b_type;
93 }
94 if (component == 2) {
95 return descriptor.g_type;
96 }
97 break;
98 case TextureFormat::B5G6R5:
99 case TextureFormat::B6G5R5:
100 case TextureFormat::B10G11R11:
101 if (component == 0) {
102 return descriptor.b_type;
103 }
104 if (component == 1) {
105 return descriptor.g_type;
106 }
107 if (component == 2) {
108 return descriptor.r_type;
109 }
110 break;
111 case TextureFormat::R24G8:
112 case TextureFormat::R8G24:
113 case TextureFormat::R8G8:
114 case TextureFormat::G4R4:
115 if (component == 0) {
116 return descriptor.g_type;
117 }
118 if (component == 1) {
119 return descriptor.r_type;
120 }
121 break;
122 default:
123 break;
124 }
125 UNIMPLEMENTED_MSG("Texture format not implemented={}", format);
126 return ComponentType::FLOAT;
127}
128
129bool IsComponentEnabled(std::size_t component_mask, std::size_t component) {
130 constexpr u8 R = 0b0001;
131 constexpr u8 G = 0b0010;
132 constexpr u8 B = 0b0100;
133 constexpr u8 A = 0b1000;
134 constexpr std::array<u8, 16> mask = {
135 0, (R), (G), (R | G), (B), (R | B), (G | B), (R | G | B),
136 (A), (R | A), (G | A), (R | G | A), (B | A), (R | B | A), (G | B | A), (R | G | B | A)};
137 return std::bitset<4>{mask.at(component_mask)}.test(component);
138}
139
140u32 GetComponentSize(TextureFormat format, std::size_t component) {
141 switch (format) {
142 case TextureFormat::R32G32B32A32:
143 return 32;
144 case TextureFormat::R16G16B16A16:
145 return 16;
146 case TextureFormat::R32G32B32:
147 return component <= 2 ? 32 : 0;
148 case TextureFormat::R32G32:
149 return component <= 1 ? 32 : 0;
150 case TextureFormat::R16G16:
151 return component <= 1 ? 16 : 0;
152 case TextureFormat::R32:
153 return component == 0 ? 32 : 0;
154 case TextureFormat::R16:
155 return component == 0 ? 16 : 0;
156 case TextureFormat::R8:
157 return component == 0 ? 8 : 0;
158 case TextureFormat::R1:
159 return component == 0 ? 1 : 0;
160 case TextureFormat::A8R8G8B8:
161 return 8;
162 case TextureFormat::A2B10G10R10:
163 return (component == 3 || component == 2 || component == 1) ? 10 : 2;
164 case TextureFormat::A4B4G4R4:
165 return 4;
166 case TextureFormat::A5B5G5R1:
167 return (component == 0 || component == 1 || component == 2) ? 5 : 1;
168 case TextureFormat::A1B5G5R5:
169 return (component == 1 || component == 2 || component == 3) ? 5 : 1;
170 case TextureFormat::R32_B24G8:
171 if (component == 0) {
172 return 32;
173 }
174 if (component == 1) {
175 return 24;
176 }
177 if (component == 2) {
178 return 8;
179 }
180 return 0;
181 case TextureFormat::B5G6R5:
182 if (component == 0 || component == 2) {
183 return 5;
184 }
185 if (component == 1) {
186 return 6;
187 }
188 return 0;
189 case TextureFormat::B6G5R5:
190 if (component == 1 || component == 2) {
191 return 5;
192 }
193 if (component == 0) {
194 return 6;
195 }
196 return 0;
197 case TextureFormat::B10G11R11:
198 if (component == 1 || component == 2) {
199 return 11;
200 }
201 if (component == 0) {
202 return 10;
203 }
204 return 0;
205 case TextureFormat::R24G8:
206 if (component == 0) {
207 return 8;
208 }
209 if (component == 1) {
210 return 24;
211 }
212 return 0;
213 case TextureFormat::R8G24:
214 if (component == 0) {
215 return 24;
216 }
217 if (component == 1) {
218 return 8;
219 }
220 return 0;
221 case TextureFormat::R8G8:
222 return (component == 0 || component == 1) ? 8 : 0;
223 case TextureFormat::G4R4:
224 return (component == 0 || component == 1) ? 4 : 0;
225 default:
226 UNIMPLEMENTED_MSG("Texture format not implemented={}", format);
227 return 0;
228 }
229}
230
231std::size_t GetImageComponentMask(TextureFormat format) {
232 constexpr u8 R = 0b0001;
233 constexpr u8 G = 0b0010;
234 constexpr u8 B = 0b0100;
235 constexpr u8 A = 0b1000;
236 switch (format) {
237 case TextureFormat::R32G32B32A32:
238 case TextureFormat::R16G16B16A16:
239 case TextureFormat::A8R8G8B8:
240 case TextureFormat::A2B10G10R10:
241 case TextureFormat::A4B4G4R4:
242 case TextureFormat::A5B5G5R1:
243 case TextureFormat::A1B5G5R5:
244 return std::size_t{R | G | B | A};
245 case TextureFormat::R32G32B32:
246 case TextureFormat::R32_B24G8:
247 case TextureFormat::B5G6R5:
248 case TextureFormat::B6G5R5:
249 case TextureFormat::B10G11R11:
250 return std::size_t{R | G | B};
251 case TextureFormat::R32G32:
252 case TextureFormat::R16G16:
253 case TextureFormat::R24G8:
254 case TextureFormat::R8G24:
255 case TextureFormat::R8G8:
256 case TextureFormat::G4R4:
257 return std::size_t{R | G};
258 case TextureFormat::R32:
259 case TextureFormat::R16:
260 case TextureFormat::R8:
261 case TextureFormat::R1:
262 return std::size_t{R};
263 default:
264 UNIMPLEMENTED_MSG("Texture format not implemented={}", format);
265 return std::size_t{R | G | B | A};
266 }
267}
268
269std::size_t GetImageTypeNumCoordinates(Tegra::Shader::ImageType image_type) {
270 switch (image_type) {
271 case Tegra::Shader::ImageType::Texture1D:
272 case Tegra::Shader::ImageType::TextureBuffer:
273 return 1;
274 case Tegra::Shader::ImageType::Texture1DArray:
275 case Tegra::Shader::ImageType::Texture2D:
276 return 2;
277 case Tegra::Shader::ImageType::Texture2DArray:
278 case Tegra::Shader::ImageType::Texture3D:
279 return 3;
280 }
281 UNREACHABLE();
282 return 1;
283}
284} // Anonymous namespace
285
286std::pair<Node, bool> ShaderIR::GetComponentValue(ComponentType component_type, u32 component_size,
287 Node original_value) {
288 switch (component_type) {
289 case ComponentType::SNORM: {
290 // range [-1.0, 1.0]
291 auto cnv_value = Operation(OperationCode::FMul, original_value,
292 Immediate(static_cast<float>(1 << component_size) / 2.f - 1.f));
293 cnv_value = Operation(OperationCode::ICastFloat, std::move(cnv_value));
294 return {BitfieldExtract(std::move(cnv_value), 0, component_size), true};
295 }
296 case ComponentType::SINT:
297 case ComponentType::UNORM: {
298 bool is_signed = component_type == ComponentType::SINT;
299 // range [0.0, 1.0]
300 auto cnv_value = Operation(OperationCode::FMul, original_value,
301 Immediate(static_cast<float>(1 << component_size) - 1.f));
302 return {SignedOperation(OperationCode::ICastFloat, is_signed, std::move(cnv_value)),
303 is_signed};
304 }
305 case ComponentType::UINT: // range [0, (1 << component_size) - 1]
306 return {std::move(original_value), false};
307 case ComponentType::FLOAT:
308 if (component_size == 16) {
309 return {Operation(OperationCode::HCastFloat, original_value), true};
310 } else {
311 return {std::move(original_value), true};
312 }
313 default:
314 UNIMPLEMENTED_MSG("Unimplemented component type={}", component_type);
315 return {std::move(original_value), true};
316 }
317}
318
319u32 ShaderIR::DecodeImage(NodeBlock& bb, u32 pc) {
320 const Instruction instr = {program_code[pc]};
321 const auto opcode = OpCode::Decode(instr);
322
323 const auto GetCoordinates = [this, instr](Tegra::Shader::ImageType image_type) {
324 std::vector<Node> coords;
325 const std::size_t num_coords{GetImageTypeNumCoordinates(image_type)};
326 coords.reserve(num_coords);
327 for (std::size_t i = 0; i < num_coords; ++i) {
328 coords.push_back(GetRegister(instr.gpr8.Value() + i));
329 }
330 return coords;
331 };
332
333 switch (opcode->get().GetId()) {
334 case OpCode::Id::SULD: {
335 UNIMPLEMENTED_IF(instr.suldst.out_of_bounds_store !=
336 Tegra::Shader::OutOfBoundsStore::Ignore);
337
338 const auto type{instr.suldst.image_type};
339 auto& image{instr.suldst.is_immediate ? GetImage(instr.image, type)
340 : GetBindlessImage(instr.gpr39, type)};
341 image.MarkRead();
342
343 if (instr.suldst.mode == Tegra::Shader::SurfaceDataMode::P) {
344 u32 indexer = 0;
345 for (u32 element = 0; element < 4; ++element) {
346 if (!instr.suldst.IsComponentEnabled(element)) {
347 continue;
348 }
349 MetaImage meta{image, {}, element};
350 Node value = Operation(OperationCode::ImageLoad, meta, GetCoordinates(type));
351 SetTemporary(bb, indexer++, std::move(value));
352 }
353 for (u32 i = 0; i < indexer; ++i) {
354 SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i));
355 }
356 } else if (instr.suldst.mode == Tegra::Shader::SurfaceDataMode::D_BA) {
357 UNIMPLEMENTED_IF(instr.suldst.GetStoreDataLayout() != StoreType::Bits32 &&
358 instr.suldst.GetStoreDataLayout() != StoreType::Bits64);
359
360 auto descriptor = [this, instr] {
361 std::optional<Tegra::Engines::SamplerDescriptor> sampler_descriptor;
362 if (instr.suldst.is_immediate) {
363 sampler_descriptor =
364 registry.ObtainBoundSampler(static_cast<u32>(instr.image.index.Value()));
365 } else {
366 const Node image_register = GetRegister(instr.gpr39);
367 const auto result = TrackCbuf(image_register, global_code,
368 static_cast<s64>(global_code.size()));
369 const auto buffer = std::get<1>(result);
370 const auto offset = std::get<2>(result);
371 sampler_descriptor = registry.ObtainBindlessSampler(buffer, offset);
372 }
373 if (!sampler_descriptor) {
374 UNREACHABLE_MSG("Failed to obtain image descriptor");
375 }
376 return *sampler_descriptor;
377 }();
378
379 const auto comp_mask = GetImageComponentMask(descriptor.format);
380
381 switch (instr.suldst.GetStoreDataLayout()) {
382 case StoreType::Bits32:
383 case StoreType::Bits64: {
384 u32 indexer = 0;
385 u32 shifted_counter = 0;
386 Node value = Immediate(0);
387 for (u32 element = 0; element < 4; ++element) {
388 if (!IsComponentEnabled(comp_mask, element)) {
389 continue;
390 }
391 const auto component_type = GetComponentType(descriptor, element);
392 const auto component_size = GetComponentSize(descriptor.format, element);
393 MetaImage meta{image, {}, element};
394
395 auto [converted_value, is_signed] = GetComponentValue(
396 component_type, component_size,
397 Operation(OperationCode::ImageLoad, meta, GetCoordinates(type)));
398
399 // shift element to correct position
400 const auto shifted = shifted_counter;
401 if (shifted > 0) {
402 converted_value =
403 SignedOperation(OperationCode::ILogicalShiftLeft, is_signed,
404 std::move(converted_value), Immediate(shifted));
405 }
406 shifted_counter += component_size;
407
408 // add value into result
409 value = Operation(OperationCode::UBitwiseOr, value, std::move(converted_value));
410
411 // if we shifted enough for 1 byte -> we save it into temp
412 if (shifted_counter >= 32) {
413 SetTemporary(bb, indexer++, std::move(value));
414 // reset counter and value to prepare pack next byte
415 value = Immediate(0);
416 shifted_counter = 0;
417 }
418 }
419 for (u32 i = 0; i < indexer; ++i) {
420 SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i));
421 }
422 break;
423 }
424 default:
425 UNREACHABLE();
426 break;
427 }
428 }
429 break;
430 }
431 case OpCode::Id::SUST: {
432 UNIMPLEMENTED_IF(instr.suldst.mode != Tegra::Shader::SurfaceDataMode::P);
433 UNIMPLEMENTED_IF(instr.suldst.out_of_bounds_store !=
434 Tegra::Shader::OutOfBoundsStore::Ignore);
435 UNIMPLEMENTED_IF(instr.suldst.component_mask_selector != 0xf); // Ensure we have RGBA
436
437 std::vector<Node> values;
438 constexpr std::size_t hardcoded_size{4};
439 for (std::size_t i = 0; i < hardcoded_size; ++i) {
440 values.push_back(GetRegister(instr.gpr0.Value() + i));
441 }
442
443 const auto type{instr.suldst.image_type};
444 auto& image{instr.suldst.is_immediate ? GetImage(instr.image, type)
445 : GetBindlessImage(instr.gpr39, type)};
446 image.MarkWrite();
447
448 MetaImage meta{image, std::move(values)};
449 bb.push_back(Operation(OperationCode::ImageStore, meta, GetCoordinates(type)));
450 break;
451 }
452 case OpCode::Id::SUATOM: {
453 UNIMPLEMENTED_IF(instr.suatom_d.is_ba != 0);
454
455 const OperationCode operation_code = [instr] {
456 switch (instr.suatom_d.operation_type) {
457 case Tegra::Shader::ImageAtomicOperationType::S32:
458 case Tegra::Shader::ImageAtomicOperationType::U32:
459 switch (instr.suatom_d.operation) {
460 case Tegra::Shader::ImageAtomicOperation::Add:
461 return OperationCode::AtomicImageAdd;
462 case Tegra::Shader::ImageAtomicOperation::And:
463 return OperationCode::AtomicImageAnd;
464 case Tegra::Shader::ImageAtomicOperation::Or:
465 return OperationCode::AtomicImageOr;
466 case Tegra::Shader::ImageAtomicOperation::Xor:
467 return OperationCode::AtomicImageXor;
468 case Tegra::Shader::ImageAtomicOperation::Exch:
469 return OperationCode::AtomicImageExchange;
470 default:
471 break;
472 }
473 break;
474 default:
475 break;
476 }
477 UNIMPLEMENTED_MSG("Unimplemented operation={}, type={}",
478 static_cast<u64>(instr.suatom_d.operation.Value()),
479 static_cast<u64>(instr.suatom_d.operation_type.Value()));
480 return OperationCode::AtomicImageAdd;
481 }();
482
483 Node value = GetRegister(instr.gpr0);
484
485 const auto type = instr.suatom_d.image_type;
486 auto& image = GetImage(instr.image, type);
487 image.MarkAtomic();
488
489 MetaImage meta{image, {std::move(value)}};
490 SetRegister(bb, instr.gpr0, Operation(operation_code, meta, GetCoordinates(type)));
491 break;
492 }
493 default:
494 UNIMPLEMENTED_MSG("Unhandled image instruction: {}", opcode->get().GetName());
495 }
496
497 return pc;
498}
499
500ImageEntry& ShaderIR::GetImage(Tegra::Shader::Image image, Tegra::Shader::ImageType type) {
501 const auto offset = static_cast<u32>(image.index.Value());
502
503 const auto it =
504 std::find_if(std::begin(used_images), std::end(used_images),
505 [offset](const ImageEntry& entry) { return entry.offset == offset; });
506 if (it != std::end(used_images)) {
507 ASSERT(!it->is_bindless && it->type == type);
508 return *it;
509 }
510
511 const auto next_index = static_cast<u32>(used_images.size());
512 return used_images.emplace_back(next_index, offset, type);
513}
514
515ImageEntry& ShaderIR::GetBindlessImage(Tegra::Shader::Register reg, Tegra::Shader::ImageType type) {
516 const Node image_register = GetRegister(reg);
517 const auto result =
518 TrackCbuf(image_register, global_code, static_cast<s64>(global_code.size()));
519
520 const auto buffer = std::get<1>(result);
521 const auto offset = std::get<2>(result);
522
523 const auto it = std::find_if(std::begin(used_images), std::end(used_images),
524 [buffer, offset](const ImageEntry& entry) {
525 return entry.buffer == buffer && entry.offset == offset;
526 });
527 if (it != std::end(used_images)) {
528 ASSERT(it->is_bindless && it->type == type);
529 return *it;
530 }
531
532 const auto next_index = static_cast<u32>(used_images.size());
533 return used_images.emplace_back(next_index, offset, buffer, type);
534}
535
536} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode/integer_set.cpp b/src/video_core/shader/decode/integer_set.cpp
deleted file mode 100644
index 59809bcd8..000000000
--- a/src/video_core/shader/decode/integer_set.cpp
+++ /dev/null
@@ -1,49 +0,0 @@
1// Copyright 2018 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/common_types.h"
6#include "video_core/engines/shader_bytecode.h"
7#include "video_core/shader/node_helper.h"
8#include "video_core/shader/shader_ir.h"
9
10namespace VideoCommon::Shader {
11
12using Tegra::Shader::Instruction;
13using Tegra::Shader::OpCode;
14
15u32 ShaderIR::DecodeIntegerSet(NodeBlock& bb, u32 pc) {
16 const Instruction instr = {program_code[pc]};
17
18 const Node op_a = GetRegister(instr.gpr8);
19 const Node op_b = [&]() {
20 if (instr.is_b_imm) {
21 return Immediate(instr.alu.GetSignedImm20_20());
22 } else if (instr.is_b_gpr) {
23 return GetRegister(instr.gpr20);
24 } else {
25 return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset());
26 }
27 }();
28
29 // The iset instruction sets a register to 1.0 or -1 (depending on the bf bit) if the condition
30 // is true, and to 0 otherwise.
31 const Node second_pred = GetPredicate(instr.iset.pred39, instr.iset.neg_pred != 0);
32 const Node first_pred =
33 GetPredicateComparisonInteger(instr.iset.cond, instr.iset.is_signed, op_a, op_b);
34
35 const OperationCode combiner = GetPredicateCombiner(instr.iset.op);
36
37 const Node predicate = Operation(combiner, first_pred, second_pred);
38
39 const Node true_value = instr.iset.bf ? Immediate(1.0f) : Immediate(-1);
40 const Node false_value = instr.iset.bf ? Immediate(0.0f) : Immediate(0);
41 const Node value =
42 Operation(OperationCode::Select, PRECISE, predicate, true_value, false_value);
43
44 SetRegister(bb, instr.gpr0, value);
45
46 return pc;
47}
48
49} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode/integer_set_predicate.cpp b/src/video_core/shader/decode/integer_set_predicate.cpp
deleted file mode 100644
index 25e48fef8..000000000
--- a/src/video_core/shader/decode/integer_set_predicate.cpp
+++ /dev/null
@@ -1,53 +0,0 @@
1// Copyright 2018 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/assert.h"
6#include "common/common_types.h"
7#include "video_core/engines/shader_bytecode.h"
8#include "video_core/shader/node_helper.h"
9#include "video_core/shader/shader_ir.h"
10
11namespace VideoCommon::Shader {
12
13using Tegra::Shader::Instruction;
14using Tegra::Shader::OpCode;
15using Tegra::Shader::Pred;
16
17u32 ShaderIR::DecodeIntegerSetPredicate(NodeBlock& bb, u32 pc) {
18 const Instruction instr = {program_code[pc]};
19
20 const Node op_a = GetRegister(instr.gpr8);
21
22 const Node op_b = [&]() {
23 if (instr.is_b_imm) {
24 return Immediate(instr.alu.GetSignedImm20_20());
25 } else if (instr.is_b_gpr) {
26 return GetRegister(instr.gpr20);
27 } else {
28 return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset());
29 }
30 }();
31
32 // We can't use the constant predicate as destination.
33 ASSERT(instr.isetp.pred3 != static_cast<u64>(Pred::UnusedIndex));
34
35 const Node second_pred = GetPredicate(instr.isetp.pred39, instr.isetp.neg_pred != 0);
36 const Node predicate =
37 GetPredicateComparisonInteger(instr.isetp.cond, instr.isetp.is_signed, op_a, op_b);
38
39 // Set the primary predicate to the result of Predicate OP SecondPredicate
40 const OperationCode combiner = GetPredicateCombiner(instr.isetp.op);
41 const Node value = Operation(combiner, predicate, second_pred);
42 SetPredicate(bb, instr.isetp.pred3, value);
43
44 if (instr.isetp.pred0 != static_cast<u64>(Pred::UnusedIndex)) {
45 // Set the secondary predicate to the result of !Predicate OP SecondPredicate, if enabled
46 const Node negated_pred = Operation(OperationCode::LogicalNegate, predicate);
47 SetPredicate(bb, instr.isetp.pred0, Operation(combiner, negated_pred, second_pred));
48 }
49
50 return pc;
51}
52
53} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode/memory.cpp b/src/video_core/shader/decode/memory.cpp
deleted file mode 100644
index 7728f600e..000000000
--- a/src/video_core/shader/decode/memory.cpp
+++ /dev/null
@@ -1,493 +0,0 @@
1// Copyright 2018 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <algorithm>
6#include <utility>
7#include <vector>
8
9#include <fmt/format.h>
10
11#include "common/alignment.h"
12#include "common/assert.h"
13#include "common/common_types.h"
14#include "common/logging/log.h"
15#include "video_core/engines/shader_bytecode.h"
16#include "video_core/shader/node_helper.h"
17#include "video_core/shader/shader_ir.h"
18
19namespace VideoCommon::Shader {
20
21using std::move;
22using Tegra::Shader::AtomicOp;
23using Tegra::Shader::AtomicType;
24using Tegra::Shader::Attribute;
25using Tegra::Shader::GlobalAtomicType;
26using Tegra::Shader::Instruction;
27using Tegra::Shader::OpCode;
28using Tegra::Shader::Register;
29using Tegra::Shader::StoreType;
30
31namespace {
32
33OperationCode GetAtomOperation(AtomicOp op) {
34 switch (op) {
35 case AtomicOp::Add:
36 return OperationCode::AtomicIAdd;
37 case AtomicOp::Min:
38 return OperationCode::AtomicIMin;
39 case AtomicOp::Max:
40 return OperationCode::AtomicIMax;
41 case AtomicOp::And:
42 return OperationCode::AtomicIAnd;
43 case AtomicOp::Or:
44 return OperationCode::AtomicIOr;
45 case AtomicOp::Xor:
46 return OperationCode::AtomicIXor;
47 case AtomicOp::Exch:
48 return OperationCode::AtomicIExchange;
49 default:
50 UNIMPLEMENTED_MSG("op={}", op);
51 return OperationCode::AtomicIAdd;
52 }
53}
54
55bool IsUnaligned(Tegra::Shader::UniformType uniform_type) {
56 return uniform_type == Tegra::Shader::UniformType::UnsignedByte ||
57 uniform_type == Tegra::Shader::UniformType::UnsignedShort;
58}
59
60u32 GetUnalignedMask(Tegra::Shader::UniformType uniform_type) {
61 switch (uniform_type) {
62 case Tegra::Shader::UniformType::UnsignedByte:
63 return 0b11;
64 case Tegra::Shader::UniformType::UnsignedShort:
65 return 0b10;
66 default:
67 UNREACHABLE();
68 return 0;
69 }
70}
71
72u32 GetMemorySize(Tegra::Shader::UniformType uniform_type) {
73 switch (uniform_type) {
74 case Tegra::Shader::UniformType::UnsignedByte:
75 return 8;
76 case Tegra::Shader::UniformType::UnsignedShort:
77 return 16;
78 case Tegra::Shader::UniformType::Single:
79 return 32;
80 case Tegra::Shader::UniformType::Double:
81 return 64;
82 case Tegra::Shader::UniformType::Quad:
83 case Tegra::Shader::UniformType::UnsignedQuad:
84 return 128;
85 default:
86 UNIMPLEMENTED_MSG("Unimplemented size={}!", uniform_type);
87 return 32;
88 }
89}
90
91Node ExtractUnaligned(Node value, Node address, u32 mask, u32 size) {
92 Node offset = Operation(OperationCode::UBitwiseAnd, address, Immediate(mask));
93 offset = Operation(OperationCode::ULogicalShiftLeft, move(offset), Immediate(3));
94 return Operation(OperationCode::UBitfieldExtract, move(value), move(offset), Immediate(size));
95}
96
97Node InsertUnaligned(Node dest, Node value, Node address, u32 mask, u32 size) {
98 Node offset = Operation(OperationCode::UBitwiseAnd, move(address), Immediate(mask));
99 offset = Operation(OperationCode::ULogicalShiftLeft, move(offset), Immediate(3));
100 return Operation(OperationCode::UBitfieldInsert, move(dest), move(value), move(offset),
101 Immediate(size));
102}
103
104Node Sign16Extend(Node value) {
105 Node sign = Operation(OperationCode::UBitwiseAnd, value, Immediate(1U << 15));
106 Node is_sign = Operation(OperationCode::LogicalUEqual, move(sign), Immediate(1U << 15));
107 Node extend = Operation(OperationCode::Select, is_sign, Immediate(0xFFFF0000), Immediate(0));
108 return Operation(OperationCode::UBitwiseOr, move(value), move(extend));
109}
110
111} // Anonymous namespace
112
113u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
114 const Instruction instr = {program_code[pc]};
115 const auto opcode = OpCode::Decode(instr);
116
117 switch (opcode->get().GetId()) {
118 case OpCode::Id::LD_A: {
119 // Note: Shouldn't this be interp mode flat? As in no interpolation made.
120 UNIMPLEMENTED_IF_MSG(instr.gpr8.Value() != Register::ZeroIndex,
121 "Indirect attribute loads are not supported");
122 UNIMPLEMENTED_IF_MSG((instr.attribute.fmt20.immediate.Value() % sizeof(u32)) != 0,
123 "Unaligned attribute loads are not supported");
124 UNIMPLEMENTED_IF_MSG(instr.attribute.fmt20.IsPhysical() &&
125 instr.attribute.fmt20.size != Tegra::Shader::AttributeSize::Word,
126 "Non-32 bits PHYS reads are not implemented");
127
128 const Node buffer{GetRegister(instr.gpr39)};
129
130 u64 next_element = instr.attribute.fmt20.element;
131 auto next_index = static_cast<u64>(instr.attribute.fmt20.index.Value());
132
133 const auto LoadNextElement = [&](u32 reg_offset) {
134 const Node attribute{instr.attribute.fmt20.IsPhysical()
135 ? GetPhysicalInputAttribute(instr.gpr8, buffer)
136 : GetInputAttribute(static_cast<Attribute::Index>(next_index),
137 next_element, buffer)};
138
139 SetRegister(bb, instr.gpr0.Value() + reg_offset, attribute);
140
141 // Load the next attribute element into the following register. If the element
142 // to load goes beyond the vec4 size, load the first element of the next
143 // attribute.
144 next_element = (next_element + 1) % 4;
145 next_index = next_index + (next_element == 0 ? 1 : 0);
146 };
147
148 const u32 num_words = static_cast<u32>(instr.attribute.fmt20.size.Value()) + 1;
149 for (u32 reg_offset = 0; reg_offset < num_words; ++reg_offset) {
150 LoadNextElement(reg_offset);
151 }
152 break;
153 }
154 case OpCode::Id::LD_C: {
155 UNIMPLEMENTED_IF(instr.ld_c.unknown != 0);
156
157 Node index = GetRegister(instr.gpr8);
158
159 const Node op_a =
160 GetConstBufferIndirect(instr.cbuf36.index, instr.cbuf36.GetOffset() + 0, index);
161
162 switch (instr.ld_c.type.Value()) {
163 case Tegra::Shader::UniformType::Single:
164 SetRegister(bb, instr.gpr0, op_a);
165 break;
166
167 case Tegra::Shader::UniformType::Double: {
168 const Node op_b =
169 GetConstBufferIndirect(instr.cbuf36.index, instr.cbuf36.GetOffset() + 4, index);
170
171 SetTemporary(bb, 0, op_a);
172 SetTemporary(bb, 1, op_b);
173 SetRegister(bb, instr.gpr0, GetTemporary(0));
174 SetRegister(bb, instr.gpr0.Value() + 1, GetTemporary(1));
175 break;
176 }
177 default:
178 UNIMPLEMENTED_MSG("Unhandled type: {}", instr.ld_c.type.Value());
179 }
180 break;
181 }
182 case OpCode::Id::LD_L:
183 LOG_DEBUG(HW_GPU, "LD_L cache management mode: {}", instr.ld_l.unknown);
184 [[fallthrough]];
185 case OpCode::Id::LD_S: {
186 const auto GetAddress = [&](s32 offset) {
187 ASSERT(offset % 4 == 0);
188 const Node immediate_offset = Immediate(static_cast<s32>(instr.smem_imm) + offset);
189 return Operation(OperationCode::IAdd, GetRegister(instr.gpr8), immediate_offset);
190 };
191 const auto GetMemory = [&](s32 offset) {
192 return opcode->get().GetId() == OpCode::Id::LD_S ? GetSharedMemory(GetAddress(offset))
193 : GetLocalMemory(GetAddress(offset));
194 };
195
196 switch (instr.ldst_sl.type.Value()) {
197 case StoreType::Signed16:
198 SetRegister(bb, instr.gpr0,
199 Sign16Extend(ExtractUnaligned(GetMemory(0), GetAddress(0), 0b10, 16)));
200 break;
201 case StoreType::Bits32:
202 case StoreType::Bits64:
203 case StoreType::Bits128: {
204 const u32 count = [&] {
205 switch (instr.ldst_sl.type.Value()) {
206 case StoreType::Bits32:
207 return 1;
208 case StoreType::Bits64:
209 return 2;
210 case StoreType::Bits128:
211 return 4;
212 default:
213 UNREACHABLE();
214 return 0;
215 }
216 }();
217 for (u32 i = 0; i < count; ++i) {
218 SetTemporary(bb, i, GetMemory(i * 4));
219 }
220 for (u32 i = 0; i < count; ++i) {
221 SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i));
222 }
223 break;
224 }
225 default:
226 UNIMPLEMENTED_MSG("{} Unhandled type: {}", opcode->get().GetName(),
227 instr.ldst_sl.type.Value());
228 }
229 break;
230 }
231 case OpCode::Id::LD:
232 case OpCode::Id::LDG: {
233 const auto type = [instr, &opcode]() -> Tegra::Shader::UniformType {
234 switch (opcode->get().GetId()) {
235 case OpCode::Id::LD:
236 UNIMPLEMENTED_IF_MSG(!instr.generic.extended, "Unextended LD is not implemented");
237 return instr.generic.type;
238 case OpCode::Id::LDG:
239 return instr.ldg.type;
240 default:
241 UNREACHABLE();
242 return {};
243 }
244 }();
245
246 const auto [real_address_base, base_address, descriptor] =
247 TrackGlobalMemory(bb, instr, true, false);
248
249 const u32 size = GetMemorySize(type);
250 const u32 count = Common::AlignUp(size, 32) / 32;
251 if (!real_address_base || !base_address) {
252 // Tracking failed, load zeroes.
253 for (u32 i = 0; i < count; ++i) {
254 SetRegister(bb, instr.gpr0.Value() + i, Immediate(0.0f));
255 }
256 break;
257 }
258
259 for (u32 i = 0; i < count; ++i) {
260 const Node it_offset = Immediate(i * 4);
261 const Node real_address = Operation(OperationCode::UAdd, real_address_base, it_offset);
262 Node gmem = MakeNode<GmemNode>(real_address, base_address, descriptor);
263
264 // To handle unaligned loads get the bytes used to dereference global memory and extract
265 // those bytes from the loaded u32.
266 if (IsUnaligned(type)) {
267 gmem = ExtractUnaligned(gmem, real_address, GetUnalignedMask(type), size);
268 }
269
270 SetTemporary(bb, i, gmem);
271 }
272
273 for (u32 i = 0; i < count; ++i) {
274 SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i));
275 }
276 break;
277 }
278 case OpCode::Id::ST_A: {
279 UNIMPLEMENTED_IF_MSG(instr.gpr8.Value() != Register::ZeroIndex,
280 "Indirect attribute loads are not supported");
281 UNIMPLEMENTED_IF_MSG((instr.attribute.fmt20.immediate.Value() % sizeof(u32)) != 0,
282 "Unaligned attribute loads are not supported");
283
284 u64 element = instr.attribute.fmt20.element;
285 auto index = static_cast<u64>(instr.attribute.fmt20.index.Value());
286
287 const u32 num_words = static_cast<u32>(instr.attribute.fmt20.size.Value()) + 1;
288 for (u32 reg_offset = 0; reg_offset < num_words; ++reg_offset) {
289 Node dest;
290 if (instr.attribute.fmt20.patch) {
291 const u32 offset = static_cast<u32>(index) * 4 + static_cast<u32>(element);
292 dest = MakeNode<PatchNode>(offset);
293 } else {
294 dest = GetOutputAttribute(static_cast<Attribute::Index>(index), element,
295 GetRegister(instr.gpr39));
296 }
297 const auto src = GetRegister(instr.gpr0.Value() + reg_offset);
298
299 bb.push_back(Operation(OperationCode::Assign, dest, src));
300
301 // Load the next attribute element into the following register. If the element to load
302 // goes beyond the vec4 size, load the first element of the next attribute.
303 element = (element + 1) % 4;
304 index = index + (element == 0 ? 1 : 0);
305 }
306 break;
307 }
308 case OpCode::Id::ST_L:
309 LOG_DEBUG(HW_GPU, "ST_L cache management mode: {}", instr.st_l.cache_management.Value());
310 [[fallthrough]];
311 case OpCode::Id::ST_S: {
312 const auto GetAddress = [&](s32 offset) {
313 ASSERT(offset % 4 == 0);
314 const Node immediate = Immediate(static_cast<s32>(instr.smem_imm) + offset);
315 return Operation(OperationCode::IAdd, NO_PRECISE, GetRegister(instr.gpr8), immediate);
316 };
317
318 const bool is_local = opcode->get().GetId() == OpCode::Id::ST_L;
319 const auto set_memory = is_local ? &ShaderIR::SetLocalMemory : &ShaderIR::SetSharedMemory;
320 const auto get_memory = is_local ? &ShaderIR::GetLocalMemory : &ShaderIR::GetSharedMemory;
321
322 switch (instr.ldst_sl.type.Value()) {
323 case StoreType::Bits128:
324 (this->*set_memory)(bb, GetAddress(12), GetRegister(instr.gpr0.Value() + 3));
325 (this->*set_memory)(bb, GetAddress(8), GetRegister(instr.gpr0.Value() + 2));
326 [[fallthrough]];
327 case StoreType::Bits64:
328 (this->*set_memory)(bb, GetAddress(4), GetRegister(instr.gpr0.Value() + 1));
329 [[fallthrough]];
330 case StoreType::Bits32:
331 (this->*set_memory)(bb, GetAddress(0), GetRegister(instr.gpr0));
332 break;
333 case StoreType::Unsigned16:
334 case StoreType::Signed16: {
335 Node address = GetAddress(0);
336 Node memory = (this->*get_memory)(address);
337 (this->*set_memory)(
338 bb, address, InsertUnaligned(memory, GetRegister(instr.gpr0), address, 0b10, 16));
339 break;
340 }
341 default:
342 UNIMPLEMENTED_MSG("{} unhandled type: {}", opcode->get().GetName(),
343 instr.ldst_sl.type.Value());
344 }
345 break;
346 }
347 case OpCode::Id::ST:
348 case OpCode::Id::STG: {
349 const auto type = [instr, &opcode]() -> Tegra::Shader::UniformType {
350 switch (opcode->get().GetId()) {
351 case OpCode::Id::ST:
352 UNIMPLEMENTED_IF_MSG(!instr.generic.extended, "Unextended ST is not implemented");
353 return instr.generic.type;
354 case OpCode::Id::STG:
355 return instr.stg.type;
356 default:
357 UNREACHABLE();
358 return {};
359 }
360 }();
361
362 // For unaligned reads we have to read memory too.
363 const bool is_read = IsUnaligned(type);
364 const auto [real_address_base, base_address, descriptor] =
365 TrackGlobalMemory(bb, instr, is_read, true);
366 if (!real_address_base || !base_address) {
367 // Tracking failed, skip the store.
368 break;
369 }
370
371 const u32 size = GetMemorySize(type);
372 const u32 count = Common::AlignUp(size, 32) / 32;
373 for (u32 i = 0; i < count; ++i) {
374 const Node it_offset = Immediate(i * 4);
375 const Node real_address = Operation(OperationCode::UAdd, real_address_base, it_offset);
376 const Node gmem = MakeNode<GmemNode>(real_address, base_address, descriptor);
377 Node value = GetRegister(instr.gpr0.Value() + i);
378
379 if (IsUnaligned(type)) {
380 const u32 mask = GetUnalignedMask(type);
381 value = InsertUnaligned(gmem, move(value), real_address, mask, size);
382 }
383
384 bb.push_back(Operation(OperationCode::Assign, gmem, value));
385 }
386 break;
387 }
388 case OpCode::Id::RED: {
389 UNIMPLEMENTED_IF_MSG(instr.red.type != GlobalAtomicType::U32, "type={}",
390 instr.red.type.Value());
391 const auto [real_address, base_address, descriptor] =
392 TrackGlobalMemory(bb, instr, true, true);
393 if (!real_address || !base_address) {
394 // Tracking failed, skip atomic.
395 break;
396 }
397 Node gmem = MakeNode<GmemNode>(real_address, base_address, descriptor);
398 Node value = GetRegister(instr.gpr0);
399 bb.push_back(Operation(GetAtomOperation(instr.red.operation), move(gmem), move(value)));
400 break;
401 }
402 case OpCode::Id::ATOM: {
403 UNIMPLEMENTED_IF_MSG(instr.atom.operation == AtomicOp::Inc ||
404 instr.atom.operation == AtomicOp::Dec ||
405 instr.atom.operation == AtomicOp::SafeAdd,
406 "operation={}", instr.atom.operation.Value());
407 UNIMPLEMENTED_IF_MSG(instr.atom.type == GlobalAtomicType::S64 ||
408 instr.atom.type == GlobalAtomicType::U64 ||
409 instr.atom.type == GlobalAtomicType::F16x2_FTZ_RN ||
410 instr.atom.type == GlobalAtomicType::F32_FTZ_RN,
411 "type={}", instr.atom.type.Value());
412
413 const auto [real_address, base_address, descriptor] =
414 TrackGlobalMemory(bb, instr, true, true);
415 if (!real_address || !base_address) {
416 // Tracking failed, skip atomic.
417 break;
418 }
419
420 const bool is_signed =
421 instr.atom.type == GlobalAtomicType::S32 || instr.atom.type == GlobalAtomicType::S64;
422 Node gmem = MakeNode<GmemNode>(real_address, base_address, descriptor);
423 SetRegister(bb, instr.gpr0,
424 SignedOperation(GetAtomOperation(instr.atom.operation), is_signed, gmem,
425 GetRegister(instr.gpr20)));
426 break;
427 }
428 case OpCode::Id::ATOMS: {
429 UNIMPLEMENTED_IF_MSG(instr.atoms.operation == AtomicOp::Inc ||
430 instr.atoms.operation == AtomicOp::Dec,
431 "operation={}", instr.atoms.operation.Value());
432 UNIMPLEMENTED_IF_MSG(instr.atoms.type == AtomicType::S64 ||
433 instr.atoms.type == AtomicType::U64,
434 "type={}", instr.atoms.type.Value());
435 const bool is_signed =
436 instr.atoms.type == AtomicType::S32 || instr.atoms.type == AtomicType::S64;
437 const s32 offset = instr.atoms.GetImmediateOffset();
438 Node address = GetRegister(instr.gpr8);
439 address = Operation(OperationCode::IAdd, move(address), Immediate(offset));
440 SetRegister(bb, instr.gpr0,
441 SignedOperation(GetAtomOperation(instr.atoms.operation), is_signed,
442 GetSharedMemory(move(address)), GetRegister(instr.gpr20)));
443 break;
444 }
445 case OpCode::Id::AL2P: {
446 // Ignore al2p.direction since we don't care about it.
447
448 // Calculate emulation fake physical address.
449 const Node fixed_address{Immediate(static_cast<u32>(instr.al2p.address))};
450 const Node reg{GetRegister(instr.gpr8)};
451 const Node fake_address{Operation(OperationCode::IAdd, NO_PRECISE, reg, fixed_address)};
452
453 // Set the fake address to target register.
454 SetRegister(bb, instr.gpr0, fake_address);
455
456 // Signal the shader IR to declare all possible attributes and varyings
457 uses_physical_attributes = true;
458 break;
459 }
460 default:
461 UNIMPLEMENTED_MSG("Unhandled memory instruction: {}", opcode->get().GetName());
462 }
463
464 return pc;
465}
466
467std::tuple<Node, Node, GlobalMemoryBase> ShaderIR::TrackGlobalMemory(NodeBlock& bb,
468 Instruction instr,
469 bool is_read, bool is_write) {
470 const auto addr_register{GetRegister(instr.gmem.gpr)};
471 const auto immediate_offset{static_cast<u32>(instr.gmem.offset)};
472
473 const auto [base_address, index, offset] =
474 TrackCbuf(addr_register, global_code, static_cast<s64>(global_code.size()));
475 ASSERT_OR_EXECUTE_MSG(
476 base_address != nullptr, { return std::make_tuple(nullptr, nullptr, GlobalMemoryBase{}); },
477 "Global memory tracking failed");
478
479 bb.push_back(Comment(fmt::format("Base address is c[0x{:x}][0x{:x}]", index, offset)));
480
481 const GlobalMemoryBase descriptor{index, offset};
482 const auto& entry = used_global_memory.try_emplace(descriptor).first;
483 auto& usage = entry->second;
484 usage.is_written |= is_write;
485 usage.is_read |= is_read;
486
487 const auto real_address =
488 Operation(OperationCode::UAdd, NO_PRECISE, Immediate(immediate_offset), addr_register);
489
490 return {real_address, base_address, descriptor};
491}
492
493} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode/other.cpp b/src/video_core/shader/decode/other.cpp
deleted file mode 100644
index 5f88537bc..000000000
--- a/src/video_core/shader/decode/other.cpp
+++ /dev/null
@@ -1,322 +0,0 @@
1// Copyright 2018 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/assert.h"
6#include "common/common_types.h"
7#include "common/logging/log.h"
8#include "video_core/engines/shader_bytecode.h"
9#include "video_core/shader/node_helper.h"
10#include "video_core/shader/shader_ir.h"
11
12namespace VideoCommon::Shader {
13
14using std::move;
15using Tegra::Shader::ConditionCode;
16using Tegra::Shader::Instruction;
17using Tegra::Shader::IpaInterpMode;
18using Tegra::Shader::OpCode;
19using Tegra::Shader::PixelImap;
20using Tegra::Shader::Register;
21using Tegra::Shader::SystemVariable;
22
23using Index = Tegra::Shader::Attribute::Index;
24
25u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) {
26 const Instruction instr = {program_code[pc]};
27 const auto opcode = OpCode::Decode(instr);
28
29 switch (opcode->get().GetId()) {
30 case OpCode::Id::NOP: {
31 UNIMPLEMENTED_IF(instr.nop.cc != Tegra::Shader::ConditionCode::T);
32 UNIMPLEMENTED_IF(instr.nop.trigger != 0);
33 // With the previous preconditions, this instruction is a no-operation.
34 break;
35 }
36 case OpCode::Id::EXIT: {
37 const ConditionCode cc = instr.flow_condition_code;
38 UNIMPLEMENTED_IF_MSG(cc != ConditionCode::T, "EXIT condition code used: {}", cc);
39
40 switch (instr.flow.cond) {
41 case Tegra::Shader::FlowCondition::Always:
42 bb.push_back(Operation(OperationCode::Exit));
43 if (instr.pred.pred_index == static_cast<u64>(Pred::UnusedIndex)) {
44 // If this is an unconditional exit then just end processing here,
45 // otherwise we have to account for the possibility of the condition
46 // not being met, so continue processing the next instruction.
47 pc = MAX_PROGRAM_LENGTH - 1;
48 }
49 break;
50
51 case Tegra::Shader::FlowCondition::Fcsm_Tr:
52 // TODO(bunnei): What is this used for? If we assume this conditon is not
53 // satisifed, dual vertex shaders in Farming Simulator make more sense
54 UNIMPLEMENTED_MSG("Skipping unknown FlowCondition::Fcsm_Tr");
55 break;
56
57 default:
58 UNIMPLEMENTED_MSG("Unhandled flow condition: {}", instr.flow.cond.Value());
59 }
60 break;
61 }
62 case OpCode::Id::KIL: {
63 UNIMPLEMENTED_IF(instr.flow.cond != Tegra::Shader::FlowCondition::Always);
64
65 const ConditionCode cc = instr.flow_condition_code;
66 UNIMPLEMENTED_IF_MSG(cc != ConditionCode::T, "KIL condition code used: {}", cc);
67
68 bb.push_back(Operation(OperationCode::Discard));
69 break;
70 }
71 case OpCode::Id::S2R: {
72 const Node value = [this, instr] {
73 switch (instr.sys20) {
74 case SystemVariable::LaneId:
75 return Operation(OperationCode::ThreadId);
76 case SystemVariable::InvocationId:
77 return Operation(OperationCode::InvocationId);
78 case SystemVariable::Ydirection:
79 uses_y_negate = true;
80 return Operation(OperationCode::YNegate);
81 case SystemVariable::InvocationInfo:
82 LOG_WARNING(HW_GPU, "S2R instruction with InvocationInfo is incomplete");
83 return Immediate(0x00ff'0000U);
84 case SystemVariable::WscaleFactorXY:
85 UNIMPLEMENTED_MSG("S2R WscaleFactorXY is not implemented");
86 return Immediate(0U);
87 case SystemVariable::WscaleFactorZ:
88 UNIMPLEMENTED_MSG("S2R WscaleFactorZ is not implemented");
89 return Immediate(0U);
90 case SystemVariable::Tid: {
91 Node val = Immediate(0);
92 val = BitfieldInsert(val, Operation(OperationCode::LocalInvocationIdX), 0, 9);
93 val = BitfieldInsert(val, Operation(OperationCode::LocalInvocationIdY), 16, 9);
94 val = BitfieldInsert(val, Operation(OperationCode::LocalInvocationIdZ), 26, 5);
95 return val;
96 }
97 case SystemVariable::TidX:
98 return Operation(OperationCode::LocalInvocationIdX);
99 case SystemVariable::TidY:
100 return Operation(OperationCode::LocalInvocationIdY);
101 case SystemVariable::TidZ:
102 return Operation(OperationCode::LocalInvocationIdZ);
103 case SystemVariable::CtaIdX:
104 return Operation(OperationCode::WorkGroupIdX);
105 case SystemVariable::CtaIdY:
106 return Operation(OperationCode::WorkGroupIdY);
107 case SystemVariable::CtaIdZ:
108 return Operation(OperationCode::WorkGroupIdZ);
109 case SystemVariable::EqMask:
110 case SystemVariable::LtMask:
111 case SystemVariable::LeMask:
112 case SystemVariable::GtMask:
113 case SystemVariable::GeMask:
114 uses_warps = true;
115 switch (instr.sys20) {
116 case SystemVariable::EqMask:
117 return Operation(OperationCode::ThreadEqMask);
118 case SystemVariable::LtMask:
119 return Operation(OperationCode::ThreadLtMask);
120 case SystemVariable::LeMask:
121 return Operation(OperationCode::ThreadLeMask);
122 case SystemVariable::GtMask:
123 return Operation(OperationCode::ThreadGtMask);
124 case SystemVariable::GeMask:
125 return Operation(OperationCode::ThreadGeMask);
126 default:
127 UNREACHABLE();
128 return Immediate(0u);
129 }
130 default:
131 UNIMPLEMENTED_MSG("Unhandled system move: {}", instr.sys20.Value());
132 return Immediate(0u);
133 }
134 }();
135 SetRegister(bb, instr.gpr0, value);
136
137 break;
138 }
139 case OpCode::Id::BRA: {
140 Node branch;
141 if (instr.bra.constant_buffer == 0) {
142 const u32 target = pc + instr.bra.GetBranchTarget();
143 branch = Operation(OperationCode::Branch, Immediate(target));
144 } else {
145 const u32 target = pc + 1;
146 const Node op_a = GetConstBuffer(instr.cbuf36.index, instr.cbuf36.GetOffset());
147 const Node convert = SignedOperation(OperationCode::IArithmeticShiftRight, true,
148 PRECISE, op_a, Immediate(3));
149 const Node operand =
150 Operation(OperationCode::IAdd, PRECISE, convert, Immediate(target));
151 branch = Operation(OperationCode::BranchIndirect, operand);
152 }
153
154 const Tegra::Shader::ConditionCode cc = instr.flow_condition_code;
155 if (cc != Tegra::Shader::ConditionCode::T) {
156 bb.push_back(Conditional(GetConditionCode(cc), {branch}));
157 } else {
158 bb.push_back(branch);
159 }
160 break;
161 }
162 case OpCode::Id::BRX: {
163 Node operand;
164 if (instr.brx.constant_buffer != 0) {
165 const s32 target = pc + 1;
166 const Node index = GetRegister(instr.gpr8);
167 const Node op_a =
168 GetConstBufferIndirect(instr.cbuf36.index, instr.cbuf36.GetOffset() + 0, index);
169 const Node convert = SignedOperation(OperationCode::IArithmeticShiftRight, true,
170 PRECISE, op_a, Immediate(3));
171 operand = Operation(OperationCode::IAdd, PRECISE, convert, Immediate(target));
172 } else {
173 const s32 target = pc + instr.brx.GetBranchExtend();
174 const Node op_a = GetRegister(instr.gpr8);
175 const Node convert = SignedOperation(OperationCode::IArithmeticShiftRight, true,
176 PRECISE, op_a, Immediate(3));
177 operand = Operation(OperationCode::IAdd, PRECISE, convert, Immediate(target));
178 }
179 const Node branch = Operation(OperationCode::BranchIndirect, operand);
180
181 const ConditionCode cc = instr.flow_condition_code;
182 if (cc != ConditionCode::T) {
183 bb.push_back(Conditional(GetConditionCode(cc), {branch}));
184 } else {
185 bb.push_back(branch);
186 }
187 break;
188 }
189 case OpCode::Id::SSY: {
190 UNIMPLEMENTED_IF_MSG(instr.bra.constant_buffer != 0,
191 "Constant buffer flow is not supported");
192
193 if (disable_flow_stack) {
194 break;
195 }
196
197 // The SSY opcode tells the GPU where to re-converge divergent execution paths with SYNC.
198 const u32 target = pc + instr.bra.GetBranchTarget();
199 bb.push_back(
200 Operation(OperationCode::PushFlowStack, MetaStackClass::Ssy, Immediate(target)));
201 break;
202 }
203 case OpCode::Id::PBK: {
204 UNIMPLEMENTED_IF_MSG(instr.bra.constant_buffer != 0,
205 "Constant buffer PBK is not supported");
206
207 if (disable_flow_stack) {
208 break;
209 }
210
211 // PBK pushes to a stack the address where BRK will jump to.
212 const u32 target = pc + instr.bra.GetBranchTarget();
213 bb.push_back(
214 Operation(OperationCode::PushFlowStack, MetaStackClass::Pbk, Immediate(target)));
215 break;
216 }
217 case OpCode::Id::SYNC: {
218 const ConditionCode cc = instr.flow_condition_code;
219 UNIMPLEMENTED_IF_MSG(cc != ConditionCode::T, "SYNC condition code used: {}", cc);
220
221 if (decompiled) {
222 break;
223 }
224
225 // The SYNC opcode jumps to the address previously set by the SSY opcode
226 bb.push_back(Operation(OperationCode::PopFlowStack, MetaStackClass::Ssy));
227 break;
228 }
229 case OpCode::Id::BRK: {
230 const ConditionCode cc = instr.flow_condition_code;
231 UNIMPLEMENTED_IF_MSG(cc != ConditionCode::T, "BRK condition code used: {}", cc);
232 if (decompiled) {
233 break;
234 }
235
236 // The BRK opcode jumps to the address previously set by the PBK opcode
237 bb.push_back(Operation(OperationCode::PopFlowStack, MetaStackClass::Pbk));
238 break;
239 }
240 case OpCode::Id::IPA: {
241 const bool is_physical = instr.ipa.idx && instr.gpr8.Value() != 0xff;
242 const auto attribute = instr.attribute.fmt28;
243 const Index index = attribute.index;
244
245 Node value = is_physical ? GetPhysicalInputAttribute(instr.gpr8)
246 : GetInputAttribute(index, attribute.element);
247
248 // Code taken from Ryujinx.
249 if (index >= Index::Attribute_0 && index <= Index::Attribute_31) {
250 const u32 location = static_cast<u32>(index) - static_cast<u32>(Index::Attribute_0);
251 if (header.ps.GetPixelImap(location) == PixelImap::Perspective) {
252 Node position_w = GetInputAttribute(Index::Position, 3);
253 value = Operation(OperationCode::FMul, move(value), move(position_w));
254 }
255 }
256
257 if (instr.ipa.interp_mode == IpaInterpMode::Multiply) {
258 value = Operation(OperationCode::FMul, move(value), GetRegister(instr.gpr20));
259 }
260
261 value = GetSaturatedFloat(move(value), instr.ipa.saturate);
262
263 SetRegister(bb, instr.gpr0, move(value));
264 break;
265 }
266 case OpCode::Id::OUT_R: {
267 UNIMPLEMENTED_IF_MSG(instr.gpr20.Value() != Register::ZeroIndex,
268 "Stream buffer is not supported");
269
270 if (instr.out.emit) {
271 // gpr0 is used to store the next address and gpr8 contains the address to emit.
272 // Hardware uses pointers here but we just ignore it
273 bb.push_back(Operation(OperationCode::EmitVertex));
274 SetRegister(bb, instr.gpr0, Immediate(0));
275 }
276 if (instr.out.cut) {
277 bb.push_back(Operation(OperationCode::EndPrimitive));
278 }
279 break;
280 }
281 case OpCode::Id::ISBERD: {
282 UNIMPLEMENTED_IF(instr.isberd.o != 0);
283 UNIMPLEMENTED_IF(instr.isberd.skew != 0);
284 UNIMPLEMENTED_IF(instr.isberd.shift != Tegra::Shader::IsberdShift::None);
285 UNIMPLEMENTED_IF(instr.isberd.mode != Tegra::Shader::IsberdMode::None);
286 LOG_WARNING(HW_GPU, "ISBERD instruction is incomplete");
287 SetRegister(bb, instr.gpr0, GetRegister(instr.gpr8));
288 break;
289 }
290 case OpCode::Id::BAR: {
291 UNIMPLEMENTED_IF_MSG(instr.value != 0xF0A81B8000070000ULL, "BAR is not BAR.SYNC 0x0");
292 bb.push_back(Operation(OperationCode::Barrier));
293 break;
294 }
295 case OpCode::Id::MEMBAR: {
296 UNIMPLEMENTED_IF(instr.membar.unknown != Tegra::Shader::MembarUnknown::Default);
297 const OperationCode type = [instr] {
298 switch (instr.membar.type) {
299 case Tegra::Shader::MembarType::CTA:
300 return OperationCode::MemoryBarrierGroup;
301 case Tegra::Shader::MembarType::GL:
302 return OperationCode::MemoryBarrierGlobal;
303 default:
304 UNIMPLEMENTED_MSG("MEMBAR type={}", instr.membar.type.Value());
305 return OperationCode::MemoryBarrierGlobal;
306 }
307 }();
308 bb.push_back(Operation(type));
309 break;
310 }
311 case OpCode::Id::DEPBAR: {
312 LOG_DEBUG(HW_GPU, "DEPBAR instruction is stubbed");
313 break;
314 }
315 default:
316 UNIMPLEMENTED_MSG("Unhandled instruction: {}", opcode->get().GetName());
317 }
318
319 return pc;
320}
321
322} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode/predicate_set_predicate.cpp b/src/video_core/shader/decode/predicate_set_predicate.cpp
deleted file mode 100644
index 9290d22eb..000000000
--- a/src/video_core/shader/decode/predicate_set_predicate.cpp
+++ /dev/null
@@ -1,68 +0,0 @@
1// Copyright 2018 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/assert.h"
6#include "common/common_types.h"
7#include "video_core/engines/shader_bytecode.h"
8#include "video_core/shader/node_helper.h"
9#include "video_core/shader/shader_ir.h"
10
11namespace VideoCommon::Shader {
12
13using Tegra::Shader::Instruction;
14using Tegra::Shader::OpCode;
15using Tegra::Shader::Pred;
16
17u32 ShaderIR::DecodePredicateSetPredicate(NodeBlock& bb, u32 pc) {
18 const Instruction instr = {program_code[pc]};
19 const auto opcode = OpCode::Decode(instr);
20
21 switch (opcode->get().GetId()) {
22 case OpCode::Id::PSETP: {
23 const Node op_a = GetPredicate(instr.psetp.pred12, instr.psetp.neg_pred12 != 0);
24 const Node op_b = GetPredicate(instr.psetp.pred29, instr.psetp.neg_pred29 != 0);
25
26 // We can't use the constant predicate as destination.
27 ASSERT(instr.psetp.pred3 != static_cast<u64>(Pred::UnusedIndex));
28
29 const Node second_pred = GetPredicate(instr.psetp.pred39, instr.psetp.neg_pred39 != 0);
30
31 const OperationCode combiner = GetPredicateCombiner(instr.psetp.op);
32 const Node predicate = Operation(combiner, op_a, op_b);
33
34 // Set the primary predicate to the result of Predicate OP SecondPredicate
35 SetPredicate(bb, instr.psetp.pred3, Operation(combiner, predicate, second_pred));
36
37 if (instr.psetp.pred0 != static_cast<u64>(Pred::UnusedIndex)) {
38 // Set the secondary predicate to the result of !Predicate OP SecondPredicate, if
39 // enabled
40 SetPredicate(bb, instr.psetp.pred0,
41 Operation(combiner, Operation(OperationCode::LogicalNegate, predicate),
42 second_pred));
43 }
44 break;
45 }
46 case OpCode::Id::CSETP: {
47 const Node pred = GetPredicate(instr.csetp.pred39, instr.csetp.neg_pred39 != 0);
48 const Node condition_code = GetConditionCode(instr.csetp.cc);
49
50 const OperationCode combiner = GetPredicateCombiner(instr.csetp.op);
51
52 if (instr.csetp.pred3 != static_cast<u64>(Pred::UnusedIndex)) {
53 SetPredicate(bb, instr.csetp.pred3, Operation(combiner, condition_code, pred));
54 }
55 if (instr.csetp.pred0 != static_cast<u64>(Pred::UnusedIndex)) {
56 const Node neg_cc = Operation(OperationCode::LogicalNegate, condition_code);
57 SetPredicate(bb, instr.csetp.pred0, Operation(combiner, neg_cc, pred));
58 }
59 break;
60 }
61 default:
62 UNIMPLEMENTED_MSG("Unhandled predicate instruction: {}", opcode->get().GetName());
63 }
64
65 return pc;
66}
67
68} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode/predicate_set_register.cpp b/src/video_core/shader/decode/predicate_set_register.cpp
deleted file mode 100644
index 84dbc50fe..000000000
--- a/src/video_core/shader/decode/predicate_set_register.cpp
+++ /dev/null
@@ -1,46 +0,0 @@
1// Copyright 2018 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/assert.h"
6#include "common/common_types.h"
7#include "video_core/engines/shader_bytecode.h"
8#include "video_core/shader/node_helper.h"
9#include "video_core/shader/shader_ir.h"
10
11namespace VideoCommon::Shader {
12
13using Tegra::Shader::Instruction;
14using Tegra::Shader::OpCode;
15
16u32 ShaderIR::DecodePredicateSetRegister(NodeBlock& bb, u32 pc) {
17 const Instruction instr = {program_code[pc]};
18
19 UNIMPLEMENTED_IF_MSG(instr.generates_cc,
20 "Condition codes generation in PSET is not implemented");
21
22 const Node op_a = GetPredicate(instr.pset.pred12, instr.pset.neg_pred12 != 0);
23 const Node op_b = GetPredicate(instr.pset.pred29, instr.pset.neg_pred29 != 0);
24 const Node first_pred = Operation(GetPredicateCombiner(instr.pset.cond), op_a, op_b);
25
26 const Node second_pred = GetPredicate(instr.pset.pred39, instr.pset.neg_pred39 != 0);
27
28 const OperationCode combiner = GetPredicateCombiner(instr.pset.op);
29 const Node predicate = Operation(combiner, first_pred, second_pred);
30
31 const Node true_value = instr.pset.bf ? Immediate(1.0f) : Immediate(0xffffffff);
32 const Node false_value = instr.pset.bf ? Immediate(0.0f) : Immediate(0);
33 const Node value =
34 Operation(OperationCode::Select, PRECISE, predicate, true_value, false_value);
35
36 if (instr.pset.bf) {
37 SetInternalFlagsFromFloat(bb, value, instr.generates_cc);
38 } else {
39 SetInternalFlagsFromInteger(bb, value, instr.generates_cc);
40 }
41 SetRegister(bb, instr.gpr0, value);
42
43 return pc;
44}
45
46} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode/register_set_predicate.cpp b/src/video_core/shader/decode/register_set_predicate.cpp
deleted file mode 100644
index 6116c31aa..000000000
--- a/src/video_core/shader/decode/register_set_predicate.cpp
+++ /dev/null
@@ -1,86 +0,0 @@
1// Copyright 2018 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <utility>
6
7#include "common/assert.h"
8#include "common/common_types.h"
9#include "video_core/engines/shader_bytecode.h"
10#include "video_core/shader/node_helper.h"
11#include "video_core/shader/shader_ir.h"
12
13namespace VideoCommon::Shader {
14
15using std::move;
16using Tegra::Shader::Instruction;
17using Tegra::Shader::OpCode;
18
19namespace {
20constexpr u64 NUM_CONDITION_CODES = 4;
21constexpr u64 NUM_PREDICATES = 7;
22} // namespace
23
24u32 ShaderIR::DecodeRegisterSetPredicate(NodeBlock& bb, u32 pc) {
25 const Instruction instr = {program_code[pc]};
26 const auto opcode = OpCode::Decode(instr);
27
28 Node apply_mask = [this, opcode, instr] {
29 switch (opcode->get().GetId()) {
30 case OpCode::Id::R2P_IMM:
31 case OpCode::Id::P2R_IMM:
32 return Immediate(static_cast<u32>(instr.p2r_r2p.immediate_mask));
33 default:
34 UNREACHABLE();
35 return Immediate(0);
36 }
37 }();
38
39 const u32 offset = static_cast<u32>(instr.p2r_r2p.byte) * 8;
40
41 const bool cc = instr.p2r_r2p.mode == Tegra::Shader::R2pMode::Cc;
42 const u64 num_entries = cc ? NUM_CONDITION_CODES : NUM_PREDICATES;
43 const auto get_entry = [this, cc](u64 entry) {
44 return cc ? GetInternalFlag(static_cast<InternalFlag>(entry)) : GetPredicate(entry);
45 };
46
47 switch (opcode->get().GetId()) {
48 case OpCode::Id::R2P_IMM: {
49 Node mask = GetRegister(instr.gpr8);
50
51 for (u64 entry = 0; entry < num_entries; ++entry) {
52 const u32 shift = static_cast<u32>(entry);
53
54 Node apply = BitfieldExtract(apply_mask, shift, 1);
55 Node condition = Operation(OperationCode::LogicalUNotEqual, apply, Immediate(0));
56
57 Node compare = BitfieldExtract(mask, offset + shift, 1);
58 Node value = Operation(OperationCode::LogicalUNotEqual, move(compare), Immediate(0));
59
60 Node code = Operation(OperationCode::LogicalAssign, get_entry(entry), move(value));
61 bb.push_back(Conditional(condition, {move(code)}));
62 }
63 break;
64 }
65 case OpCode::Id::P2R_IMM: {
66 Node value = Immediate(0);
67 for (u64 entry = 0; entry < num_entries; ++entry) {
68 Node bit = Operation(OperationCode::Select, get_entry(entry), Immediate(1U << entry),
69 Immediate(0));
70 value = Operation(OperationCode::UBitwiseOr, move(value), move(bit));
71 }
72 value = Operation(OperationCode::UBitwiseAnd, move(value), apply_mask);
73 value = BitfieldInsert(GetRegister(instr.gpr8), move(value), offset, 8);
74
75 SetRegister(bb, instr.gpr0, move(value));
76 break;
77 }
78 default:
79 UNIMPLEMENTED_MSG("Unhandled P2R/R2R instruction: {}", opcode->get().GetName());
80 break;
81 }
82
83 return pc;
84}
85
86} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode/shift.cpp b/src/video_core/shader/decode/shift.cpp
deleted file mode 100644
index a53819c15..000000000
--- a/src/video_core/shader/decode/shift.cpp
+++ /dev/null
@@ -1,153 +0,0 @@
1// Copyright 2018 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/assert.h"
6#include "common/common_types.h"
7#include "video_core/engines/shader_bytecode.h"
8#include "video_core/shader/node_helper.h"
9#include "video_core/shader/shader_ir.h"
10
11namespace VideoCommon::Shader {
12
13using std::move;
14using Tegra::Shader::Instruction;
15using Tegra::Shader::OpCode;
16using Tegra::Shader::ShfType;
17using Tegra::Shader::ShfXmode;
18
19namespace {
20
21Node IsFull(Node shift) {
22 return Operation(OperationCode::LogicalIEqual, move(shift), Immediate(32));
23}
24
25Node Shift(OperationCode opcode, Node value, Node shift) {
26 Node shifted = Operation(opcode, move(value), shift);
27 return Operation(OperationCode::Select, IsFull(move(shift)), Immediate(0), move(shifted));
28}
29
30Node ClampShift(Node shift, s32 size = 32) {
31 shift = Operation(OperationCode::IMax, move(shift), Immediate(0));
32 return Operation(OperationCode::IMin, move(shift), Immediate(size));
33}
34
35Node WrapShift(Node shift, s32 size = 32) {
36 return Operation(OperationCode::UBitwiseAnd, move(shift), Immediate(size - 1));
37}
38
39Node ShiftRight(Node low, Node high, Node shift, Node low_shift, ShfType type) {
40 // These values are used when the shift value is less than 32
41 Node less_low = Shift(OperationCode::ILogicalShiftRight, low, shift);
42 Node less_high = Shift(OperationCode::ILogicalShiftLeft, high, low_shift);
43 Node less = Operation(OperationCode::IBitwiseOr, move(less_high), move(less_low));
44
45 if (type == ShfType::Bits32) {
46 // On 32 bit shifts we are either full (shifting 32) or shifting less than 32 bits
47 return Operation(OperationCode::Select, IsFull(move(shift)), move(high), move(less));
48 }
49
50 // And these when it's larger than or 32
51 const bool is_signed = type == ShfType::S64;
52 const auto opcode = SignedToUnsignedCode(OperationCode::IArithmeticShiftRight, is_signed);
53 Node reduced = Operation(OperationCode::IAdd, shift, Immediate(-32));
54 Node greater = Shift(opcode, high, move(reduced));
55
56 Node is_less = Operation(OperationCode::LogicalILessThan, shift, Immediate(32));
57 Node is_zero = Operation(OperationCode::LogicalIEqual, move(shift), Immediate(0));
58
59 Node value = Operation(OperationCode::Select, move(is_less), move(less), move(greater));
60 return Operation(OperationCode::Select, move(is_zero), move(high), move(value));
61}
62
63Node ShiftLeft(Node low, Node high, Node shift, Node low_shift, ShfType type) {
64 // These values are used when the shift value is less than 32
65 Node less_low = Operation(OperationCode::ILogicalShiftRight, low, low_shift);
66 Node less_high = Operation(OperationCode::ILogicalShiftLeft, high, shift);
67 Node less = Operation(OperationCode::IBitwiseOr, move(less_low), move(less_high));
68
69 if (type == ShfType::Bits32) {
70 // On 32 bit shifts we are either full (shifting 32) or shifting less than 32 bits
71 return Operation(OperationCode::Select, IsFull(move(shift)), move(low), move(less));
72 }
73
74 // And these when it's larger than or 32
75 Node reduced = Operation(OperationCode::IAdd, shift, Immediate(-32));
76 Node greater = Shift(OperationCode::ILogicalShiftLeft, move(low), move(reduced));
77
78 Node is_less = Operation(OperationCode::LogicalILessThan, shift, Immediate(32));
79 Node is_zero = Operation(OperationCode::LogicalIEqual, move(shift), Immediate(0));
80
81 Node value = Operation(OperationCode::Select, move(is_less), move(less), move(greater));
82 return Operation(OperationCode::Select, move(is_zero), move(high), move(value));
83}
84
85} // Anonymous namespace
86
87u32 ShaderIR::DecodeShift(NodeBlock& bb, u32 pc) {
88 const Instruction instr = {program_code[pc]};
89 const auto opcode = OpCode::Decode(instr);
90
91 Node op_a = GetRegister(instr.gpr8);
92 Node op_b = [this, instr] {
93 if (instr.is_b_imm) {
94 return Immediate(instr.alu.GetSignedImm20_20());
95 } else if (instr.is_b_gpr) {
96 return GetRegister(instr.gpr20);
97 } else {
98 return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset());
99 }
100 }();
101
102 switch (const auto opid = opcode->get().GetId(); opid) {
103 case OpCode::Id::SHR_C:
104 case OpCode::Id::SHR_R:
105 case OpCode::Id::SHR_IMM: {
106 op_b = instr.shr.wrap ? WrapShift(move(op_b)) : ClampShift(move(op_b));
107
108 Node value = SignedOperation(OperationCode::IArithmeticShiftRight, instr.shift.is_signed,
109 move(op_a), move(op_b));
110 SetInternalFlagsFromInteger(bb, value, instr.generates_cc);
111 SetRegister(bb, instr.gpr0, move(value));
112 break;
113 }
114 case OpCode::Id::SHL_C:
115 case OpCode::Id::SHL_R:
116 case OpCode::Id::SHL_IMM: {
117 Node value = Operation(OperationCode::ILogicalShiftLeft, op_a, op_b);
118 SetInternalFlagsFromInteger(bb, value, instr.generates_cc);
119 SetRegister(bb, instr.gpr0, move(value));
120 break;
121 }
122 case OpCode::Id::SHF_RIGHT_R:
123 case OpCode::Id::SHF_RIGHT_IMM:
124 case OpCode::Id::SHF_LEFT_R:
125 case OpCode::Id::SHF_LEFT_IMM: {
126 UNIMPLEMENTED_IF(instr.generates_cc);
127 UNIMPLEMENTED_IF_MSG(instr.shf.xmode != ShfXmode::None, "xmode={}",
128 instr.shf.xmode.Value());
129
130 if (instr.is_b_imm) {
131 op_b = Immediate(static_cast<u32>(instr.shf.immediate));
132 }
133 const s32 size = instr.shf.type == ShfType::Bits32 ? 32 : 64;
134 Node shift = instr.shf.wrap ? WrapShift(move(op_b), size) : ClampShift(move(op_b), size);
135
136 Node negated_shift = Operation(OperationCode::INegate, shift);
137 Node low_shift = Operation(OperationCode::IAdd, move(negated_shift), Immediate(32));
138
139 const bool is_right = opid == OpCode::Id::SHF_RIGHT_R || opid == OpCode::Id::SHF_RIGHT_IMM;
140 Node value = (is_right ? ShiftRight : ShiftLeft)(
141 move(op_a), GetRegister(instr.gpr39), move(shift), move(low_shift), instr.shf.type);
142
143 SetRegister(bb, instr.gpr0, move(value));
144 break;
145 }
146 default:
147 UNIMPLEMENTED_MSG("Unhandled shift instruction: {}", opcode->get().GetName());
148 }
149
150 return pc;
151}
152
153} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode/texture.cpp b/src/video_core/shader/decode/texture.cpp
deleted file mode 100644
index c69681e8d..000000000
--- a/src/video_core/shader/decode/texture.cpp
+++ /dev/null
@@ -1,935 +0,0 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <algorithm>
6#include <vector>
7#include <fmt/format.h>
8
9#include "common/assert.h"
10#include "common/bit_field.h"
11#include "common/common_types.h"
12#include "common/logging/log.h"
13#include "video_core/engines/shader_bytecode.h"
14#include "video_core/shader/node_helper.h"
15#include "video_core/shader/registry.h"
16#include "video_core/shader/shader_ir.h"
17
18namespace VideoCommon::Shader {
19
20using Tegra::Shader::Instruction;
21using Tegra::Shader::OpCode;
22using Tegra::Shader::Register;
23using Tegra::Shader::TextureMiscMode;
24using Tegra::Shader::TextureProcessMode;
25using Tegra::Shader::TextureType;
26
27static std::size_t GetCoordCount(TextureType texture_type) {
28 switch (texture_type) {
29 case TextureType::Texture1D:
30 return 1;
31 case TextureType::Texture2D:
32 return 2;
33 case TextureType::Texture3D:
34 case TextureType::TextureCube:
35 return 3;
36 default:
37 UNIMPLEMENTED_MSG("Unhandled texture type: {}", texture_type);
38 return 0;
39 }
40}
41
42u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
43 const Instruction instr = {program_code[pc]};
44 const auto opcode = OpCode::Decode(instr);
45 bool is_bindless = false;
46 switch (opcode->get().GetId()) {
47 case OpCode::Id::TEX: {
48 const TextureType texture_type{instr.tex.texture_type};
49 const bool is_array = instr.tex.array != 0;
50 const bool is_aoffi = instr.tex.UsesMiscMode(TextureMiscMode::AOFFI);
51 const bool depth_compare = instr.tex.UsesMiscMode(TextureMiscMode::DC);
52 const auto process_mode = instr.tex.GetTextureProcessMode();
53 WriteTexInstructionFloat(
54 bb, instr,
55 GetTexCode(instr, texture_type, process_mode, depth_compare, is_array, is_aoffi, {}));
56 break;
57 }
58 case OpCode::Id::TEX_B: {
59 UNIMPLEMENTED_IF_MSG(instr.tex.UsesMiscMode(TextureMiscMode::AOFFI),
60 "AOFFI is not implemented");
61
62 const TextureType texture_type{instr.tex_b.texture_type};
63 const bool is_array = instr.tex_b.array != 0;
64 const bool is_aoffi = instr.tex.UsesMiscMode(TextureMiscMode::AOFFI);
65 const bool depth_compare = instr.tex_b.UsesMiscMode(TextureMiscMode::DC);
66 const auto process_mode = instr.tex_b.GetTextureProcessMode();
67 WriteTexInstructionFloat(bb, instr,
68 GetTexCode(instr, texture_type, process_mode, depth_compare,
69 is_array, is_aoffi, {instr.gpr20}));
70 break;
71 }
72 case OpCode::Id::TEXS: {
73 const TextureType texture_type{instr.texs.GetTextureType()};
74 const bool is_array{instr.texs.IsArrayTexture()};
75 const bool depth_compare = instr.texs.UsesMiscMode(TextureMiscMode::DC);
76 const auto process_mode = instr.texs.GetTextureProcessMode();
77
78 const Node4 components =
79 GetTexsCode(instr, texture_type, process_mode, depth_compare, is_array);
80
81 if (instr.texs.fp32_flag) {
82 WriteTexsInstructionFloat(bb, instr, components);
83 } else {
84 WriteTexsInstructionHalfFloat(bb, instr, components);
85 }
86 break;
87 }
88 case OpCode::Id::TLD4_B: {
89 is_bindless = true;
90 [[fallthrough]];
91 }
92 case OpCode::Id::TLD4: {
93 UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(TextureMiscMode::NDV),
94 "NDV is not implemented");
95 const auto texture_type = instr.tld4.texture_type.Value();
96 const bool depth_compare = is_bindless ? instr.tld4_b.UsesMiscMode(TextureMiscMode::DC)
97 : instr.tld4.UsesMiscMode(TextureMiscMode::DC);
98 const bool is_array = instr.tld4.array != 0;
99 const bool is_aoffi = is_bindless ? instr.tld4_b.UsesMiscMode(TextureMiscMode::AOFFI)
100 : instr.tld4.UsesMiscMode(TextureMiscMode::AOFFI);
101 const bool is_ptp = is_bindless ? instr.tld4_b.UsesMiscMode(TextureMiscMode::PTP)
102 : instr.tld4.UsesMiscMode(TextureMiscMode::PTP);
103 WriteTexInstructionFloat(bb, instr,
104 GetTld4Code(instr, texture_type, depth_compare, is_array, is_aoffi,
105 is_ptp, is_bindless));
106 break;
107 }
108 case OpCode::Id::TLD4S: {
109 constexpr std::size_t num_coords = 2;
110 const bool is_aoffi = instr.tld4s.UsesMiscMode(TextureMiscMode::AOFFI);
111 const bool is_depth_compare = instr.tld4s.UsesMiscMode(TextureMiscMode::DC);
112 const Node op_a = GetRegister(instr.gpr8);
113 const Node op_b = GetRegister(instr.gpr20);
114
115 // TODO(Subv): Figure out how the sampler type is encoded in the TLD4S instruction.
116 std::vector<Node> coords;
117 std::vector<Node> aoffi;
118 Node depth_compare;
119 if (is_depth_compare) {
120 // Note: TLD4S coordinate encoding works just like TEXS's
121 const Node op_y = GetRegister(instr.gpr8.Value() + 1);
122 coords.push_back(op_a);
123 coords.push_back(op_y);
124 if (is_aoffi) {
125 aoffi = GetAoffiCoordinates(op_b, num_coords, true);
126 depth_compare = GetRegister(instr.gpr20.Value() + 1);
127 } else {
128 depth_compare = op_b;
129 }
130 } else {
131 // There's no depth compare
132 coords.push_back(op_a);
133 if (is_aoffi) {
134 coords.push_back(GetRegister(instr.gpr8.Value() + 1));
135 aoffi = GetAoffiCoordinates(op_b, num_coords, true);
136 } else {
137 coords.push_back(op_b);
138 }
139 }
140 const Node component = Immediate(static_cast<u32>(instr.tld4s.component));
141
142 SamplerInfo info;
143 info.is_shadow = is_depth_compare;
144 const std::optional<SamplerEntry> sampler = GetSampler(instr.sampler, info);
145
146 Node4 values;
147 for (u32 element = 0; element < values.size(); ++element) {
148 MetaTexture meta{*sampler, {}, depth_compare, aoffi, {}, {},
149 {}, {}, component, element, {}};
150 values[element] = Operation(OperationCode::TextureGather, meta, coords);
151 }
152
153 if (instr.tld4s.fp16_flag) {
154 WriteTexsInstructionHalfFloat(bb, instr, values, true);
155 } else {
156 WriteTexsInstructionFloat(bb, instr, values, true);
157 }
158 break;
159 }
160 case OpCode::Id::TXD_B:
161 is_bindless = true;
162 [[fallthrough]];
163 case OpCode::Id::TXD: {
164 UNIMPLEMENTED_IF_MSG(instr.txd.UsesMiscMode(TextureMiscMode::AOFFI),
165 "AOFFI is not implemented");
166
167 const bool is_array = instr.txd.is_array != 0;
168 const auto derivate_reg = instr.gpr20.Value();
169 const auto texture_type = instr.txd.texture_type.Value();
170 const auto coord_count = GetCoordCount(texture_type);
171 u64 base_reg = instr.gpr8.Value();
172 Node index_var;
173 SamplerInfo info;
174 info.type = texture_type;
175 info.is_array = is_array;
176 const std::optional<SamplerEntry> sampler =
177 is_bindless ? GetBindlessSampler(base_reg, info, index_var)
178 : GetSampler(instr.sampler, info);
179 Node4 values;
180 if (!sampler) {
181 std::generate(values.begin(), values.end(), [this] { return Immediate(0); });
182 WriteTexInstructionFloat(bb, instr, values);
183 break;
184 }
185
186 if (is_bindless) {
187 base_reg++;
188 }
189
190 std::vector<Node> coords;
191 std::vector<Node> derivates;
192 for (std::size_t i = 0; i < coord_count; ++i) {
193 coords.push_back(GetRegister(base_reg + i));
194 const std::size_t derivate = i * 2;
195 derivates.push_back(GetRegister(derivate_reg + derivate));
196 derivates.push_back(GetRegister(derivate_reg + derivate + 1));
197 }
198
199 Node array_node = {};
200 if (is_array) {
201 const Node info_reg = GetRegister(base_reg + coord_count);
202 array_node = BitfieldExtract(info_reg, 0, 16);
203 }
204
205 for (u32 element = 0; element < values.size(); ++element) {
206 MetaTexture meta{*sampler, array_node, {}, {}, {}, derivates,
207 {}, {}, {}, element, index_var};
208 values[element] = Operation(OperationCode::TextureGradient, std::move(meta), coords);
209 }
210
211 WriteTexInstructionFloat(bb, instr, values);
212
213 break;
214 }
215 case OpCode::Id::TXQ_B:
216 is_bindless = true;
217 [[fallthrough]];
218 case OpCode::Id::TXQ: {
219 Node index_var;
220 const std::optional<SamplerEntry> sampler =
221 is_bindless ? GetBindlessSampler(instr.gpr8, {}, index_var)
222 : GetSampler(instr.sampler, {});
223
224 if (!sampler) {
225 u32 indexer = 0;
226 for (u32 element = 0; element < 4; ++element) {
227 if (!instr.txq.IsComponentEnabled(element)) {
228 continue;
229 }
230 const Node value = Immediate(0);
231 SetTemporary(bb, indexer++, value);
232 }
233 for (u32 i = 0; i < indexer; ++i) {
234 SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i));
235 }
236 break;
237 }
238
239 u32 indexer = 0;
240 switch (instr.txq.query_type) {
241 case Tegra::Shader::TextureQueryType::Dimension: {
242 for (u32 element = 0; element < 4; ++element) {
243 if (!instr.txq.IsComponentEnabled(element)) {
244 continue;
245 }
246 MetaTexture meta{*sampler, {}, {}, {}, {}, {}, {}, {}, {}, element, index_var};
247 const Node value =
248 Operation(OperationCode::TextureQueryDimensions, meta,
249 GetRegister(instr.gpr8.Value() + (is_bindless ? 1 : 0)));
250 SetTemporary(bb, indexer++, value);
251 }
252 for (u32 i = 0; i < indexer; ++i) {
253 SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i));
254 }
255 break;
256 }
257 default:
258 UNIMPLEMENTED_MSG("Unhandled texture query type: {}", instr.txq.query_type.Value());
259 }
260 break;
261 }
262 case OpCode::Id::TMML_B:
263 is_bindless = true;
264 [[fallthrough]];
265 case OpCode::Id::TMML: {
266 UNIMPLEMENTED_IF_MSG(instr.tmml.UsesMiscMode(Tegra::Shader::TextureMiscMode::NDV),
267 "NDV is not implemented");
268
269 const auto texture_type = instr.tmml.texture_type.Value();
270 const bool is_array = instr.tmml.array != 0;
271 SamplerInfo info;
272 info.type = texture_type;
273 info.is_array = is_array;
274 Node index_var;
275 const std::optional<SamplerEntry> sampler =
276 is_bindless ? GetBindlessSampler(instr.gpr20, info, index_var)
277 : GetSampler(instr.sampler, info);
278
279 if (!sampler) {
280 u32 indexer = 0;
281 for (u32 element = 0; element < 2; ++element) {
282 if (!instr.tmml.IsComponentEnabled(element)) {
283 continue;
284 }
285 const Node value = Immediate(0);
286 SetTemporary(bb, indexer++, value);
287 }
288 for (u32 i = 0; i < indexer; ++i) {
289 SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i));
290 }
291 break;
292 }
293
294 const u64 base_index = is_array ? 1 : 0;
295 const u64 num_components = [texture_type] {
296 switch (texture_type) {
297 case TextureType::Texture1D:
298 return 1;
299 case TextureType::Texture2D:
300 return 2;
301 case TextureType::TextureCube:
302 return 3;
303 default:
304 UNIMPLEMENTED_MSG("Unhandled texture type {}", texture_type);
305 return 2;
306 }
307 }();
308 // TODO: What's the array component used for?
309
310 std::vector<Node> coords;
311 coords.reserve(num_components);
312 for (u64 component = 0; component < num_components; ++component) {
313 coords.push_back(GetRegister(instr.gpr8.Value() + base_index + component));
314 }
315
316 u32 indexer = 0;
317 for (u32 element = 0; element < 2; ++element) {
318 if (!instr.tmml.IsComponentEnabled(element)) {
319 continue;
320 }
321 MetaTexture meta{*sampler, {}, {}, {}, {}, {}, {}, {}, {}, element, index_var};
322 Node value = Operation(OperationCode::TextureQueryLod, meta, coords);
323 SetTemporary(bb, indexer++, std::move(value));
324 }
325 for (u32 i = 0; i < indexer; ++i) {
326 SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i));
327 }
328 break;
329 }
330 case OpCode::Id::TLD: {
331 UNIMPLEMENTED_IF_MSG(instr.tld.aoffi, "AOFFI is not implemented");
332 UNIMPLEMENTED_IF_MSG(instr.tld.ms, "MS is not implemented");
333 UNIMPLEMENTED_IF_MSG(instr.tld.cl, "CL is not implemented");
334
335 WriteTexInstructionFloat(bb, instr, GetTldCode(instr));
336 break;
337 }
338 case OpCode::Id::TLDS: {
339 const TextureType texture_type{instr.tlds.GetTextureType()};
340 const bool is_array{instr.tlds.IsArrayTexture()};
341
342 UNIMPLEMENTED_IF_MSG(instr.tlds.UsesMiscMode(TextureMiscMode::AOFFI),
343 "AOFFI is not implemented");
344 UNIMPLEMENTED_IF_MSG(instr.tlds.UsesMiscMode(TextureMiscMode::MZ), "MZ is not implemented");
345
346 const Node4 components = GetTldsCode(instr, texture_type, is_array);
347
348 if (instr.tlds.fp32_flag) {
349 WriteTexsInstructionFloat(bb, instr, components);
350 } else {
351 WriteTexsInstructionHalfFloat(bb, instr, components);
352 }
353 break;
354 }
355 default:
356 UNIMPLEMENTED_MSG("Unhandled memory instruction: {}", opcode->get().GetName());
357 }
358
359 return pc;
360}
361
362ShaderIR::SamplerInfo ShaderIR::GetSamplerInfo(
363 SamplerInfo info, std::optional<Tegra::Engines::SamplerDescriptor> sampler) {
364 if (info.IsComplete()) {
365 return info;
366 }
367 if (!sampler) {
368 LOG_WARNING(HW_GPU, "Unknown sampler info");
369 info.type = info.type.value_or(Tegra::Shader::TextureType::Texture2D);
370 info.is_array = info.is_array.value_or(false);
371 info.is_shadow = info.is_shadow.value_or(false);
372 info.is_buffer = info.is_buffer.value_or(false);
373 return info;
374 }
375 info.type = info.type.value_or(sampler->texture_type);
376 info.is_array = info.is_array.value_or(sampler->is_array != 0);
377 info.is_shadow = info.is_shadow.value_or(sampler->is_shadow != 0);
378 info.is_buffer = info.is_buffer.value_or(sampler->is_buffer != 0);
379 return info;
380}
381
382std::optional<SamplerEntry> ShaderIR::GetSampler(Tegra::Shader::Sampler sampler,
383 SamplerInfo sampler_info) {
384 const u32 offset = static_cast<u32>(sampler.index.Value());
385 const auto info = GetSamplerInfo(sampler_info, registry.ObtainBoundSampler(offset));
386
387 // If this sampler has already been used, return the existing mapping.
388 const auto it =
389 std::find_if(used_samplers.begin(), used_samplers.end(),
390 [offset](const SamplerEntry& entry) { return entry.offset == offset; });
391 if (it != used_samplers.end()) {
392 ASSERT(!it->is_bindless && it->type == info.type && it->is_array == info.is_array &&
393 it->is_shadow == info.is_shadow && it->is_buffer == info.is_buffer);
394 return *it;
395 }
396
397 // Otherwise create a new mapping for this sampler
398 const auto next_index = static_cast<u32>(used_samplers.size());
399 return used_samplers.emplace_back(next_index, offset, *info.type, *info.is_array,
400 *info.is_shadow, *info.is_buffer, false);
401}
402
403std::optional<SamplerEntry> ShaderIR::GetBindlessSampler(Tegra::Shader::Register reg,
404 SamplerInfo info, Node& index_var) {
405 const Node sampler_register = GetRegister(reg);
406 const auto [base_node, tracked_sampler_info] =
407 TrackBindlessSampler(sampler_register, global_code, static_cast<s64>(global_code.size()));
408 if (!base_node) {
409 UNREACHABLE();
410 return std::nullopt;
411 }
412
413 if (const auto sampler_info = std::get_if<BindlessSamplerNode>(&*tracked_sampler_info)) {
414 const u32 buffer = sampler_info->index;
415 const u32 offset = sampler_info->offset;
416 info = GetSamplerInfo(info, registry.ObtainBindlessSampler(buffer, offset));
417
418 // If this sampler has already been used, return the existing mapping.
419 const auto it = std::find_if(used_samplers.begin(), used_samplers.end(),
420 [buffer, offset](const SamplerEntry& entry) {
421 return entry.buffer == buffer && entry.offset == offset;
422 });
423 if (it != used_samplers.end()) {
424 ASSERT(it->is_bindless && it->type == info.type && it->is_array == info.is_array &&
425 it->is_shadow == info.is_shadow);
426 return *it;
427 }
428
429 // Otherwise create a new mapping for this sampler
430 const auto next_index = static_cast<u32>(used_samplers.size());
431 return used_samplers.emplace_back(next_index, offset, buffer, *info.type, *info.is_array,
432 *info.is_shadow, *info.is_buffer, false);
433 }
434 if (const auto sampler_info = std::get_if<SeparateSamplerNode>(&*tracked_sampler_info)) {
435 const std::pair indices = sampler_info->indices;
436 const std::pair offsets = sampler_info->offsets;
437 info = GetSamplerInfo(info, registry.ObtainSeparateSampler(indices, offsets));
438
439 // Try to use an already created sampler if it exists
440 const auto it =
441 std::find_if(used_samplers.begin(), used_samplers.end(),
442 [indices, offsets](const SamplerEntry& entry) {
443 return offsets == std::pair{entry.offset, entry.secondary_offset} &&
444 indices == std::pair{entry.buffer, entry.secondary_buffer};
445 });
446 if (it != used_samplers.end()) {
447 ASSERT(it->is_separated && it->type == info.type && it->is_array == info.is_array &&
448 it->is_shadow == info.is_shadow && it->is_buffer == info.is_buffer);
449 return *it;
450 }
451
452 // Otherwise create a new mapping for this sampler
453 const u32 next_index = static_cast<u32>(used_samplers.size());
454 return used_samplers.emplace_back(next_index, offsets, indices, *info.type, *info.is_array,
455 *info.is_shadow, *info.is_buffer);
456 }
457 if (const auto sampler_info = std::get_if<ArraySamplerNode>(&*tracked_sampler_info)) {
458 const u32 base_offset = sampler_info->base_offset / 4;
459 index_var = GetCustomVariable(sampler_info->bindless_var);
460 info = GetSamplerInfo(info, registry.ObtainBoundSampler(base_offset));
461
462 // If this sampler has already been used, return the existing mapping.
463 const auto it = std::find_if(
464 used_samplers.begin(), used_samplers.end(),
465 [base_offset](const SamplerEntry& entry) { return entry.offset == base_offset; });
466 if (it != used_samplers.end()) {
467 ASSERT(!it->is_bindless && it->type == info.type && it->is_array == info.is_array &&
468 it->is_shadow == info.is_shadow && it->is_buffer == info.is_buffer &&
469 it->is_indexed);
470 return *it;
471 }
472
473 uses_indexed_samplers = true;
474 // Otherwise create a new mapping for this sampler
475 const auto next_index = static_cast<u32>(used_samplers.size());
476 return used_samplers.emplace_back(next_index, base_offset, *info.type, *info.is_array,
477 *info.is_shadow, *info.is_buffer, true);
478 }
479 return std::nullopt;
480}
481
482void ShaderIR::WriteTexInstructionFloat(NodeBlock& bb, Instruction instr, const Node4& components) {
483 u32 dest_elem = 0;
484 for (u32 elem = 0; elem < 4; ++elem) {
485 if (!instr.tex.IsComponentEnabled(elem)) {
486 // Skip disabled components
487 continue;
488 }
489 SetTemporary(bb, dest_elem++, components[elem]);
490 }
491 // After writing values in temporals, move them to the real registers
492 for (u32 i = 0; i < dest_elem; ++i) {
493 SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i));
494 }
495}
496
497void ShaderIR::WriteTexsInstructionFloat(NodeBlock& bb, Instruction instr, const Node4& components,
498 bool ignore_mask) {
499 // TEXS has two destination registers and a swizzle. The first two elements in the swizzle
500 // go into gpr0+0 and gpr0+1, and the rest goes into gpr28+0 and gpr28+1
501
502 u32 dest_elem = 0;
503 for (u32 component = 0; component < 4; ++component) {
504 if (!instr.texs.IsComponentEnabled(component) && !ignore_mask)
505 continue;
506 SetTemporary(bb, dest_elem++, components[component]);
507 }
508
509 for (u32 i = 0; i < dest_elem; ++i) {
510 if (i < 2) {
511 // Write the first two swizzle components to gpr0 and gpr0+1
512 SetRegister(bb, instr.gpr0.Value() + i % 2, GetTemporary(i));
513 } else {
514 ASSERT(instr.texs.HasTwoDestinations());
515 // Write the rest of the swizzle components to gpr28 and gpr28+1
516 SetRegister(bb, instr.gpr28.Value() + i % 2, GetTemporary(i));
517 }
518 }
519}
520
521void ShaderIR::WriteTexsInstructionHalfFloat(NodeBlock& bb, Instruction instr,
522 const Node4& components, bool ignore_mask) {
523 // TEXS.F16 destionation registers are packed in two registers in pairs (just like any half
524 // float instruction).
525
526 Node4 values;
527 u32 dest_elem = 0;
528 for (u32 component = 0; component < 4; ++component) {
529 if (!instr.texs.IsComponentEnabled(component) && !ignore_mask)
530 continue;
531 values[dest_elem++] = components[component];
532 }
533 if (dest_elem == 0)
534 return;
535
536 std::generate(values.begin() + dest_elem, values.end(), [&]() { return Immediate(0); });
537
538 const Node first_value = Operation(OperationCode::HPack2, values[0], values[1]);
539 if (dest_elem <= 2) {
540 SetRegister(bb, instr.gpr0, first_value);
541 return;
542 }
543
544 SetTemporary(bb, 0, first_value);
545 SetTemporary(bb, 1, Operation(OperationCode::HPack2, values[2], values[3]));
546
547 SetRegister(bb, instr.gpr0, GetTemporary(0));
548 SetRegister(bb, instr.gpr28, GetTemporary(1));
549}
550
551Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type,
552 TextureProcessMode process_mode, std::vector<Node> coords,
553 Node array, Node depth_compare, u32 bias_offset,
554 std::vector<Node> aoffi,
555 std::optional<Tegra::Shader::Register> bindless_reg) {
556 const bool is_array = array != nullptr;
557 const bool is_shadow = depth_compare != nullptr;
558 const bool is_bindless = bindless_reg.has_value();
559
560 ASSERT_MSG(texture_type != TextureType::Texture3D || !is_array || !is_shadow,
561 "Illegal texture type");
562
563 SamplerInfo info;
564 info.type = texture_type;
565 info.is_array = is_array;
566 info.is_shadow = is_shadow;
567 info.is_buffer = false;
568
569 Node index_var;
570 const std::optional<SamplerEntry> sampler =
571 is_bindless ? GetBindlessSampler(*bindless_reg, info, index_var)
572 : GetSampler(instr.sampler, info);
573 if (!sampler) {
574 return {Immediate(0), Immediate(0), Immediate(0), Immediate(0)};
575 }
576
577 const bool lod_needed = process_mode == TextureProcessMode::LZ ||
578 process_mode == TextureProcessMode::LL ||
579 process_mode == TextureProcessMode::LLA;
580 const OperationCode opcode = lod_needed ? OperationCode::TextureLod : OperationCode::Texture;
581
582 Node bias;
583 Node lod;
584 switch (process_mode) {
585 case TextureProcessMode::None:
586 break;
587 case TextureProcessMode::LZ:
588 lod = Immediate(0.0f);
589 break;
590 case TextureProcessMode::LB:
591 // If present, lod or bias are always stored in the register indexed by the gpr20 field with
592 // an offset depending on the usage of the other registers.
593 bias = GetRegister(instr.gpr20.Value() + bias_offset);
594 break;
595 case TextureProcessMode::LL:
596 lod = GetRegister(instr.gpr20.Value() + bias_offset);
597 break;
598 default:
599 UNIMPLEMENTED_MSG("Unimplemented process mode={}", process_mode);
600 break;
601 }
602
603 Node4 values;
604 for (u32 element = 0; element < values.size(); ++element) {
605 MetaTexture meta{*sampler, array, depth_compare, aoffi, {}, {}, bias,
606 lod, {}, element, index_var};
607 values[element] = Operation(opcode, meta, coords);
608 }
609
610 return values;
611}
612
613Node4 ShaderIR::GetTexCode(Instruction instr, TextureType texture_type,
614 TextureProcessMode process_mode, bool depth_compare, bool is_array,
615 bool is_aoffi, std::optional<Tegra::Shader::Register> bindless_reg) {
616 const bool lod_bias_enabled{
617 (process_mode != TextureProcessMode::None && process_mode != TextureProcessMode::LZ)};
618
619 const bool is_bindless = bindless_reg.has_value();
620
621 u64 parameter_register = instr.gpr20.Value();
622 if (is_bindless) {
623 ++parameter_register;
624 }
625
626 const u32 bias_lod_offset = (is_bindless ? 1 : 0);
627 if (lod_bias_enabled) {
628 ++parameter_register;
629 }
630
631 const auto coord_counts = ValidateAndGetCoordinateElement(texture_type, depth_compare, is_array,
632 lod_bias_enabled, 4, 5);
633 const auto coord_count = std::get<0>(coord_counts);
634 // If enabled arrays index is always stored in the gpr8 field
635 const u64 array_register = instr.gpr8.Value();
636 // First coordinate index is the gpr8 or gpr8 + 1 when arrays are used
637 const u64 coord_register = array_register + (is_array ? 1 : 0);
638
639 std::vector<Node> coords;
640 for (std::size_t i = 0; i < coord_count; ++i) {
641 coords.push_back(GetRegister(coord_register + i));
642 }
643 // 1D.DC in OpenGL the 2nd component is ignored.
644 if (depth_compare && !is_array && texture_type == TextureType::Texture1D) {
645 coords.push_back(Immediate(0.0f));
646 }
647
648 const Node array = is_array ? GetRegister(array_register) : nullptr;
649
650 std::vector<Node> aoffi;
651 if (is_aoffi) {
652 aoffi = GetAoffiCoordinates(GetRegister(parameter_register++), coord_count, false);
653 }
654
655 Node dc;
656 if (depth_compare) {
657 // Depth is always stored in the register signaled by gpr20 or in the next register if lod
658 // or bias are used
659 dc = GetRegister(parameter_register++);
660 }
661
662 return GetTextureCode(instr, texture_type, process_mode, coords, array, dc, bias_lod_offset,
663 aoffi, bindless_reg);
664}
665
666Node4 ShaderIR::GetTexsCode(Instruction instr, TextureType texture_type,
667 TextureProcessMode process_mode, bool depth_compare, bool is_array) {
668 const bool lod_bias_enabled =
669 (process_mode != TextureProcessMode::None && process_mode != TextureProcessMode::LZ);
670
671 const auto coord_counts = ValidateAndGetCoordinateElement(texture_type, depth_compare, is_array,
672 lod_bias_enabled, 4, 4);
673 const auto coord_count = std::get<0>(coord_counts);
674
675 // If enabled arrays index is always stored in the gpr8 field
676 const u64 array_register = instr.gpr8.Value();
677 // First coordinate index is stored in gpr8 field or (gpr8 + 1) when arrays are used
678 const u64 coord_register = array_register + (is_array ? 1 : 0);
679 const u64 last_coord_register =
680 (is_array || !(lod_bias_enabled || depth_compare) || (coord_count > 2))
681 ? static_cast<u64>(instr.gpr20.Value())
682 : coord_register + 1;
683 const u32 bias_offset = coord_count > 2 ? 1 : 0;
684
685 std::vector<Node> coords;
686 for (std::size_t i = 0; i < coord_count; ++i) {
687 const bool last = (i == (coord_count - 1)) && (coord_count > 1);
688 coords.push_back(GetRegister(last ? last_coord_register : coord_register + i));
689 }
690
691 const Node array = is_array ? GetRegister(array_register) : nullptr;
692
693 Node dc;
694 if (depth_compare) {
695 // Depth is always stored in the register signaled by gpr20 or in the next register if lod
696 // or bias are used
697 const u64 depth_register = instr.gpr20.Value() + (lod_bias_enabled ? 1 : 0);
698 dc = GetRegister(depth_register);
699 }
700
701 return GetTextureCode(instr, texture_type, process_mode, coords, array, dc, bias_offset, {},
702 {});
703}
704
705Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool depth_compare,
706 bool is_array, bool is_aoffi, bool is_ptp, bool is_bindless) {
707 ASSERT_MSG(!(is_aoffi && is_ptp), "AOFFI and PTP can't be enabled at the same time");
708
709 const std::size_t coord_count = GetCoordCount(texture_type);
710
711 // If enabled arrays index is always stored in the gpr8 field
712 const u64 array_register = instr.gpr8.Value();
713 // First coordinate index is the gpr8 or gpr8 + 1 when arrays are used
714 const u64 coord_register = array_register + (is_array ? 1 : 0);
715
716 std::vector<Node> coords;
717 for (std::size_t i = 0; i < coord_count; ++i) {
718 coords.push_back(GetRegister(coord_register + i));
719 }
720
721 u64 parameter_register = instr.gpr20.Value();
722
723 SamplerInfo info;
724 info.type = texture_type;
725 info.is_array = is_array;
726 info.is_shadow = depth_compare;
727
728 Node index_var;
729 const std::optional<SamplerEntry> sampler =
730 is_bindless ? GetBindlessSampler(parameter_register++, info, index_var)
731 : GetSampler(instr.sampler, info);
732 Node4 values;
733 if (!sampler) {
734 for (u32 element = 0; element < values.size(); ++element) {
735 values[element] = Immediate(0);
736 }
737 return values;
738 }
739
740 std::vector<Node> aoffi, ptp;
741 if (is_aoffi) {
742 aoffi = GetAoffiCoordinates(GetRegister(parameter_register++), coord_count, true);
743 } else if (is_ptp) {
744 ptp = GetPtpCoordinates(
745 {GetRegister(parameter_register++), GetRegister(parameter_register++)});
746 }
747
748 Node dc;
749 if (depth_compare) {
750 dc = GetRegister(parameter_register++);
751 }
752
753 const Node component = is_bindless ? Immediate(static_cast<u32>(instr.tld4_b.component))
754 : Immediate(static_cast<u32>(instr.tld4.component));
755
756 for (u32 element = 0; element < values.size(); ++element) {
757 auto coords_copy = coords;
758 MetaTexture meta{
759 *sampler, GetRegister(array_register), dc, aoffi, ptp, {}, {}, {}, component, element,
760 index_var};
761 values[element] = Operation(OperationCode::TextureGather, meta, std::move(coords_copy));
762 }
763
764 return values;
765}
766
767Node4 ShaderIR::GetTldCode(Tegra::Shader::Instruction instr) {
768 const auto texture_type{instr.tld.texture_type};
769 const bool is_array{instr.tld.is_array != 0};
770 const bool lod_enabled{instr.tld.GetTextureProcessMode() == TextureProcessMode::LL};
771 const std::size_t coord_count{GetCoordCount(texture_type)};
772
773 u64 gpr8_cursor{instr.gpr8.Value()};
774 const Node array_register{is_array ? GetRegister(gpr8_cursor++) : nullptr};
775
776 std::vector<Node> coords;
777 coords.reserve(coord_count);
778 for (std::size_t i = 0; i < coord_count; ++i) {
779 coords.push_back(GetRegister(gpr8_cursor++));
780 }
781
782 u64 gpr20_cursor{instr.gpr20.Value()};
783 // const Node bindless_register{is_bindless ? GetRegister(gpr20_cursor++) : nullptr};
784 const Node lod{lod_enabled ? GetRegister(gpr20_cursor++) : Immediate(0u)};
785 // const Node aoffi_register{is_aoffi ? GetRegister(gpr20_cursor++) : nullptr};
786 // const Node multisample{is_multisample ? GetRegister(gpr20_cursor++) : nullptr};
787
788 const std::optional<SamplerEntry> sampler = GetSampler(instr.sampler, {});
789
790 Node4 values;
791 for (u32 element = 0; element < values.size(); ++element) {
792 auto coords_copy = coords;
793 MetaTexture meta{*sampler, array_register, {}, {}, {}, {}, {}, lod, {}, element, {}};
794 values[element] = Operation(OperationCode::TexelFetch, meta, std::move(coords_copy));
795 }
796
797 return values;
798}
799
800Node4 ShaderIR::GetTldsCode(Instruction instr, TextureType texture_type, bool is_array) {
801 SamplerInfo info;
802 info.type = texture_type;
803 info.is_array = is_array;
804 info.is_shadow = false;
805 const std::optional<SamplerEntry> sampler = GetSampler(instr.sampler, info);
806
807 const std::size_t type_coord_count = GetCoordCount(texture_type);
808 const bool lod_enabled = instr.tlds.GetTextureProcessMode() == TextureProcessMode::LL;
809 const bool aoffi_enabled = instr.tlds.UsesMiscMode(TextureMiscMode::AOFFI);
810
811 // If enabled arrays index is always stored in the gpr8 field
812 const u64 array_register = instr.gpr8.Value();
813 // if is array gpr20 is used
814 const u64 coord_register = is_array ? instr.gpr20.Value() : instr.gpr8.Value();
815
816 const u64 last_coord_register =
817 ((type_coord_count > 2) || (type_coord_count == 2 && !lod_enabled)) && !is_array
818 ? static_cast<u64>(instr.gpr20.Value())
819 : coord_register + 1;
820
821 std::vector<Node> coords;
822 for (std::size_t i = 0; i < type_coord_count; ++i) {
823 const bool last = (i == (type_coord_count - 1)) && (type_coord_count > 1);
824 coords.push_back(
825 GetRegister(last && !aoffi_enabled ? last_coord_register : coord_register + i));
826 }
827
828 const Node array = is_array ? GetRegister(array_register) : nullptr;
829 // When lod is used always is in gpr20
830 const Node lod = lod_enabled ? GetRegister(instr.gpr20) : Immediate(0);
831
832 std::vector<Node> aoffi;
833 if (aoffi_enabled) {
834 aoffi = GetAoffiCoordinates(GetRegister(instr.gpr20), type_coord_count, false);
835 }
836
837 Node4 values;
838 for (u32 element = 0; element < values.size(); ++element) {
839 auto coords_copy = coords;
840 MetaTexture meta{*sampler, array, {}, aoffi, {}, {}, {}, lod, {}, element, {}};
841 values[element] = Operation(OperationCode::TexelFetch, meta, std::move(coords_copy));
842 }
843 return values;
844}
845
846std::tuple<std::size_t, std::size_t> ShaderIR::ValidateAndGetCoordinateElement(
847 TextureType texture_type, bool depth_compare, bool is_array, bool lod_bias_enabled,
848 std::size_t max_coords, std::size_t max_inputs) {
849 const std::size_t coord_count = GetCoordCount(texture_type);
850
851 std::size_t total_coord_count = coord_count + (is_array ? 1 : 0) + (depth_compare ? 1 : 0);
852 const std::size_t total_reg_count = total_coord_count + (lod_bias_enabled ? 1 : 0);
853 if (total_coord_count > max_coords || total_reg_count > max_inputs) {
854 UNIMPLEMENTED_MSG("Unsupported Texture operation");
855 total_coord_count = std::min(total_coord_count, max_coords);
856 }
857 // 1D.DC OpenGL is using a vec3 but 2nd component is ignored later.
858 total_coord_count +=
859 (depth_compare && !is_array && texture_type == TextureType::Texture1D) ? 1 : 0;
860
861 return {coord_count, total_coord_count};
862}
863
864std::vector<Node> ShaderIR::GetAoffiCoordinates(Node aoffi_reg, std::size_t coord_count,
865 bool is_tld4) {
866 const std::array coord_offsets = is_tld4 ? std::array{0U, 8U, 16U} : std::array{0U, 4U, 8U};
867 const u32 size = is_tld4 ? 6 : 4;
868 const s32 wrap_value = is_tld4 ? 32 : 8;
869 const s32 diff_value = is_tld4 ? 64 : 16;
870 const u32 mask = (1U << size) - 1;
871
872 std::vector<Node> aoffi;
873 aoffi.reserve(coord_count);
874
875 const auto aoffi_immediate{
876 TrackImmediate(aoffi_reg, global_code, static_cast<s64>(global_code.size()))};
877 if (!aoffi_immediate) {
878 // Variable access, not supported on AMD.
879 LOG_WARNING(HW_GPU,
880 "AOFFI constant folding failed, some hardware might have graphical issues");
881 for (std::size_t coord = 0; coord < coord_count; ++coord) {
882 const Node value = BitfieldExtract(aoffi_reg, coord_offsets[coord], size);
883 const Node condition =
884 Operation(OperationCode::LogicalIGreaterEqual, value, Immediate(wrap_value));
885 const Node negative = Operation(OperationCode::IAdd, value, Immediate(-diff_value));
886 aoffi.push_back(Operation(OperationCode::Select, condition, negative, value));
887 }
888 return aoffi;
889 }
890
891 for (std::size_t coord = 0; coord < coord_count; ++coord) {
892 s32 value = (*aoffi_immediate >> coord_offsets[coord]) & mask;
893 if (value >= wrap_value) {
894 value -= diff_value;
895 }
896 aoffi.push_back(Immediate(value));
897 }
898 return aoffi;
899}
900
901std::vector<Node> ShaderIR::GetPtpCoordinates(std::array<Node, 2> ptp_regs) {
902 static constexpr u32 num_entries = 8;
903
904 std::vector<Node> ptp;
905 ptp.reserve(num_entries);
906
907 const auto global_size = static_cast<s64>(global_code.size());
908 const std::optional low = TrackImmediate(ptp_regs[0], global_code, global_size);
909 const std::optional high = TrackImmediate(ptp_regs[1], global_code, global_size);
910 if (!low || !high) {
911 for (u32 entry = 0; entry < num_entries; ++entry) {
912 const u32 reg = entry / 4;
913 const u32 offset = entry % 4;
914 const Node value = BitfieldExtract(ptp_regs[reg], offset * 8, 6);
915 const Node condition =
916 Operation(OperationCode::LogicalIGreaterEqual, value, Immediate(32));
917 const Node negative = Operation(OperationCode::IAdd, value, Immediate(-64));
918 ptp.push_back(Operation(OperationCode::Select, condition, negative, value));
919 }
920 return ptp;
921 }
922
923 const u64 immediate = (static_cast<u64>(*high) << 32) | static_cast<u64>(*low);
924 for (u32 entry = 0; entry < num_entries; ++entry) {
925 s32 value = (immediate >> (entry * 8)) & 0b111111;
926 if (value >= 32) {
927 value -= 64;
928 }
929 ptp.push_back(Immediate(value));
930 }
931
932 return ptp;
933}
934
935} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode/video.cpp b/src/video_core/shader/decode/video.cpp
deleted file mode 100644
index 1c0957277..000000000
--- a/src/video_core/shader/decode/video.cpp
+++ /dev/null
@@ -1,169 +0,0 @@
1// Copyright 2018 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/assert.h"
6#include "common/common_types.h"
7#include "video_core/engines/shader_bytecode.h"
8#include "video_core/shader/node_helper.h"
9#include "video_core/shader/shader_ir.h"
10
11namespace VideoCommon::Shader {
12
13using std::move;
14using Tegra::Shader::Instruction;
15using Tegra::Shader::OpCode;
16using Tegra::Shader::Pred;
17using Tegra::Shader::VideoType;
18using Tegra::Shader::VmadShr;
19using Tegra::Shader::VmnmxOperation;
20using Tegra::Shader::VmnmxType;
21
22u32 ShaderIR::DecodeVideo(NodeBlock& bb, u32 pc) {
23 const Instruction instr = {program_code[pc]};
24 const auto opcode = OpCode::Decode(instr);
25
26 if (opcode->get().GetId() == OpCode::Id::VMNMX) {
27 DecodeVMNMX(bb, instr);
28 return pc;
29 }
30
31 const Node op_a =
32 GetVideoOperand(GetRegister(instr.gpr8), instr.video.is_byte_chunk_a, instr.video.signed_a,
33 instr.video.type_a, instr.video.byte_height_a);
34 const Node op_b = [this, instr] {
35 if (instr.video.use_register_b) {
36 return GetVideoOperand(GetRegister(instr.gpr20), instr.video.is_byte_chunk_b,
37 instr.video.signed_b, instr.video.type_b,
38 instr.video.byte_height_b);
39 }
40 if (instr.video.signed_b) {
41 const auto imm = static_cast<s16>(instr.alu.GetImm20_16());
42 return Immediate(static_cast<u32>(imm));
43 } else {
44 return Immediate(instr.alu.GetImm20_16());
45 }
46 }();
47
48 switch (opcode->get().GetId()) {
49 case OpCode::Id::VMAD: {
50 const bool result_signed = instr.video.signed_a == 1 || instr.video.signed_b == 1;
51 const Node op_c = GetRegister(instr.gpr39);
52
53 Node value = SignedOperation(OperationCode::IMul, result_signed, NO_PRECISE, op_a, op_b);
54 value = SignedOperation(OperationCode::IAdd, result_signed, NO_PRECISE, value, op_c);
55
56 if (instr.vmad.shr == VmadShr::Shr7 || instr.vmad.shr == VmadShr::Shr15) {
57 const Node shift = Immediate(instr.vmad.shr == VmadShr::Shr7 ? 7 : 15);
58 value =
59 SignedOperation(OperationCode::IArithmeticShiftRight, result_signed, value, shift);
60 }
61
62 SetInternalFlagsFromInteger(bb, value, instr.generates_cc);
63 SetRegister(bb, instr.gpr0, value);
64 break;
65 }
66 case OpCode::Id::VSETP: {
67 // We can't use the constant predicate as destination.
68 ASSERT(instr.vsetp.pred3 != static_cast<u64>(Pred::UnusedIndex));
69
70 const bool sign = instr.video.signed_a == 1 || instr.video.signed_b == 1;
71 const Node first_pred = GetPredicateComparisonInteger(instr.vsetp.cond, sign, op_a, op_b);
72 const Node second_pred = GetPredicate(instr.vsetp.pred39, false);
73
74 const OperationCode combiner = GetPredicateCombiner(instr.vsetp.op);
75
76 // Set the primary predicate to the result of Predicate OP SecondPredicate
77 SetPredicate(bb, instr.vsetp.pred3, Operation(combiner, first_pred, second_pred));
78
79 if (instr.vsetp.pred0 != static_cast<u64>(Pred::UnusedIndex)) {
80 // Set the secondary predicate to the result of !Predicate OP SecondPredicate,
81 // if enabled
82 const Node negate_pred = Operation(OperationCode::LogicalNegate, first_pred);
83 SetPredicate(bb, instr.vsetp.pred0, Operation(combiner, negate_pred, second_pred));
84 }
85 break;
86 }
87 default:
88 UNIMPLEMENTED_MSG("Unhandled video instruction: {}", opcode->get().GetName());
89 }
90
91 return pc;
92}
93
94Node ShaderIR::GetVideoOperand(Node op, bool is_chunk, bool is_signed, VideoType type,
95 u64 byte_height) {
96 if (!is_chunk) {
97 return BitfieldExtract(op, static_cast<u32>(byte_height * 8), 8);
98 }
99
100 switch (type) {
101 case VideoType::Size16_Low:
102 return BitfieldExtract(op, 0, 16);
103 case VideoType::Size16_High:
104 return BitfieldExtract(op, 16, 16);
105 case VideoType::Size32:
106 // TODO(Rodrigo): From my hardware tests it becomes a bit "mad" when this type is used
107 // (1 * 1 + 0 == 0x5b800000). Until a better explanation is found: abort.
108 UNIMPLEMENTED();
109 return Immediate(0);
110 case VideoType::Invalid:
111 UNREACHABLE_MSG("Invalid instruction encoding");
112 return Immediate(0);
113 default:
114 UNREACHABLE();
115 return Immediate(0);
116 }
117}
118
119void ShaderIR::DecodeVMNMX(NodeBlock& bb, Tegra::Shader::Instruction instr) {
120 UNIMPLEMENTED_IF(!instr.vmnmx.is_op_b_register);
121 UNIMPLEMENTED_IF(instr.vmnmx.SourceFormatA() != VmnmxType::Bits32);
122 UNIMPLEMENTED_IF(instr.vmnmx.SourceFormatB() != VmnmxType::Bits32);
123 UNIMPLEMENTED_IF(instr.vmnmx.is_src_a_signed != instr.vmnmx.is_src_b_signed);
124 UNIMPLEMENTED_IF(instr.vmnmx.sat);
125 UNIMPLEMENTED_IF(instr.generates_cc);
126
127 Node op_a = GetRegister(instr.gpr8);
128 Node op_b = GetRegister(instr.gpr20);
129 Node op_c = GetRegister(instr.gpr39);
130
131 const bool is_oper1_signed = instr.vmnmx.is_src_a_signed; // Stubbed
132 const bool is_oper2_signed = instr.vmnmx.is_dest_signed;
133
134 const auto operation_a = instr.vmnmx.mx ? OperationCode::IMax : OperationCode::IMin;
135 Node value = SignedOperation(operation_a, is_oper1_signed, move(op_a), move(op_b));
136
137 switch (instr.vmnmx.operation) {
138 case VmnmxOperation::Mrg_16H:
139 value = BitfieldInsert(move(op_c), move(value), 16, 16);
140 break;
141 case VmnmxOperation::Mrg_16L:
142 value = BitfieldInsert(move(op_c), move(value), 0, 16);
143 break;
144 case VmnmxOperation::Mrg_8B0:
145 value = BitfieldInsert(move(op_c), move(value), 0, 8);
146 break;
147 case VmnmxOperation::Mrg_8B2:
148 value = BitfieldInsert(move(op_c), move(value), 16, 8);
149 break;
150 case VmnmxOperation::Acc:
151 value = Operation(OperationCode::IAdd, move(value), move(op_c));
152 break;
153 case VmnmxOperation::Min:
154 value = SignedOperation(OperationCode::IMin, is_oper2_signed, move(value), move(op_c));
155 break;
156 case VmnmxOperation::Max:
157 value = SignedOperation(OperationCode::IMax, is_oper2_signed, move(value), move(op_c));
158 break;
159 case VmnmxOperation::Nop:
160 break;
161 default:
162 UNREACHABLE();
163 break;
164 }
165
166 SetRegister(bb, instr.gpr0, move(value));
167}
168
169} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode/warp.cpp b/src/video_core/shader/decode/warp.cpp
deleted file mode 100644
index 37433d783..000000000
--- a/src/video_core/shader/decode/warp.cpp
+++ /dev/null
@@ -1,117 +0,0 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/assert.h"
6#include "common/common_types.h"
7#include "video_core/engines/shader_bytecode.h"
8#include "video_core/shader/node_helper.h"
9#include "video_core/shader/shader_ir.h"
10
11namespace VideoCommon::Shader {
12
13using Tegra::Shader::Instruction;
14using Tegra::Shader::OpCode;
15using Tegra::Shader::Pred;
16using Tegra::Shader::ShuffleOperation;
17using Tegra::Shader::VoteOperation;
18
19namespace {
20
21OperationCode GetOperationCode(VoteOperation vote_op) {
22 switch (vote_op) {
23 case VoteOperation::All:
24 return OperationCode::VoteAll;
25 case VoteOperation::Any:
26 return OperationCode::VoteAny;
27 case VoteOperation::Eq:
28 return OperationCode::VoteEqual;
29 default:
30 UNREACHABLE_MSG("Invalid vote operation={}", vote_op);
31 return OperationCode::VoteAll;
32 }
33}
34
35} // Anonymous namespace
36
37u32 ShaderIR::DecodeWarp(NodeBlock& bb, u32 pc) {
38 const Instruction instr = {program_code[pc]};
39 const auto opcode = OpCode::Decode(instr);
40
41 // Signal the backend that this shader uses warp instructions.
42 uses_warps = true;
43
44 switch (opcode->get().GetId()) {
45 case OpCode::Id::VOTE: {
46 const Node value = GetPredicate(instr.vote.value, instr.vote.negate_value != 0);
47 const Node active = Operation(OperationCode::BallotThread, value);
48 const Node vote = Operation(GetOperationCode(instr.vote.operation), value);
49 SetRegister(bb, instr.gpr0, active);
50 SetPredicate(bb, instr.vote.dest_pred, vote);
51 break;
52 }
53 case OpCode::Id::SHFL: {
54 Node mask = instr.shfl.is_mask_imm ? Immediate(static_cast<u32>(instr.shfl.mask_imm))
55 : GetRegister(instr.gpr39);
56 Node index = instr.shfl.is_index_imm ? Immediate(static_cast<u32>(instr.shfl.index_imm))
57 : GetRegister(instr.gpr20);
58
59 Node thread_id = Operation(OperationCode::ThreadId);
60 Node clamp = Operation(OperationCode::IBitwiseAnd, mask, Immediate(0x1FU));
61 Node seg_mask = BitfieldExtract(mask, 8, 16);
62
63 Node neg_seg_mask = Operation(OperationCode::IBitwiseNot, seg_mask);
64 Node min_thread_id = Operation(OperationCode::IBitwiseAnd, thread_id, seg_mask);
65 Node max_thread_id = Operation(OperationCode::IBitwiseOr, min_thread_id,
66 Operation(OperationCode::IBitwiseAnd, clamp, neg_seg_mask));
67
68 Node src_thread_id = [instr, index, neg_seg_mask, min_thread_id, thread_id] {
69 switch (instr.shfl.operation) {
70 case ShuffleOperation::Idx:
71 return Operation(OperationCode::IBitwiseOr,
72 Operation(OperationCode::IBitwiseAnd, index, neg_seg_mask),
73 min_thread_id);
74 case ShuffleOperation::Down:
75 return Operation(OperationCode::IAdd, thread_id, index);
76 case ShuffleOperation::Up:
77 return Operation(OperationCode::IAdd, thread_id,
78 Operation(OperationCode::INegate, index));
79 case ShuffleOperation::Bfly:
80 return Operation(OperationCode::IBitwiseXor, thread_id, index);
81 }
82 UNREACHABLE();
83 return Immediate(0U);
84 }();
85
86 Node in_bounds = [instr, src_thread_id, min_thread_id, max_thread_id] {
87 if (instr.shfl.operation == ShuffleOperation::Up) {
88 return Operation(OperationCode::LogicalIGreaterEqual, src_thread_id, min_thread_id);
89 } else {
90 return Operation(OperationCode::LogicalILessEqual, src_thread_id, max_thread_id);
91 }
92 }();
93
94 SetPredicate(bb, instr.shfl.pred48, in_bounds);
95 SetRegister(
96 bb, instr.gpr0,
97 Operation(OperationCode::ShuffleIndexed, GetRegister(instr.gpr8), src_thread_id));
98 break;
99 }
100 case OpCode::Id::FSWZADD: {
101 UNIMPLEMENTED_IF(instr.fswzadd.ndv);
102
103 Node op_a = GetRegister(instr.gpr8);
104 Node op_b = GetRegister(instr.gpr20);
105 Node mask = Immediate(static_cast<u32>(instr.fswzadd.swizzle));
106 SetRegister(bb, instr.gpr0, Operation(OperationCode::FSwizzleAdd, op_a, op_b, mask));
107 break;
108 }
109 default:
110 UNIMPLEMENTED_MSG("Unhandled warp instruction: {}", opcode->get().GetName());
111 break;
112 }
113
114 return pc;
115}
116
117} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode/xmad.cpp b/src/video_core/shader/decode/xmad.cpp
deleted file mode 100644
index 233b8fa42..000000000
--- a/src/video_core/shader/decode/xmad.cpp
+++ /dev/null
@@ -1,156 +0,0 @@
1// Copyright 2018 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/assert.h"
6#include "common/common_types.h"
7#include "video_core/engines/shader_bytecode.h"
8#include "video_core/shader/node_helper.h"
9#include "video_core/shader/shader_ir.h"
10
11namespace VideoCommon::Shader {
12
13using Tegra::Shader::Instruction;
14using Tegra::Shader::OpCode;
15using Tegra::Shader::PredCondition;
16
17u32 ShaderIR::DecodeXmad(NodeBlock& bb, u32 pc) {
18 const Instruction instr = {program_code[pc]};
19 const auto opcode = OpCode::Decode(instr);
20
21 UNIMPLEMENTED_IF(instr.xmad.sign_a);
22 UNIMPLEMENTED_IF(instr.xmad.sign_b);
23 UNIMPLEMENTED_IF_MSG(instr.generates_cc,
24 "Condition codes generation in XMAD is not implemented");
25
26 Node op_a = GetRegister(instr.gpr8);
27
28 // TODO(bunnei): Needs to be fixed once op_a or op_b is signed
29 UNIMPLEMENTED_IF(instr.xmad.sign_a != instr.xmad.sign_b);
30 const bool is_signed_a = instr.xmad.sign_a == 1;
31 const bool is_signed_b = instr.xmad.sign_b == 1;
32 const bool is_signed_c = is_signed_a;
33
34 auto [is_merge, is_psl, is_high_b, mode, op_b_binding,
35 op_c] = [&]() -> std::tuple<bool, bool, bool, Tegra::Shader::XmadMode, Node, Node> {
36 switch (opcode->get().GetId()) {
37 case OpCode::Id::XMAD_CR:
38 return {instr.xmad.merge_56,
39 instr.xmad.product_shift_left_second,
40 instr.xmad.high_b,
41 instr.xmad.mode_cbf,
42 GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()),
43 GetRegister(instr.gpr39)};
44 case OpCode::Id::XMAD_RR:
45 return {instr.xmad.merge_37, instr.xmad.product_shift_left, instr.xmad.high_b_rr,
46 instr.xmad.mode, GetRegister(instr.gpr20), GetRegister(instr.gpr39)};
47 case OpCode::Id::XMAD_RC:
48 return {false,
49 false,
50 instr.xmad.high_b,
51 instr.xmad.mode_cbf,
52 GetRegister(instr.gpr39),
53 GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset())};
54 case OpCode::Id::XMAD_IMM:
55 return {instr.xmad.merge_37,
56 instr.xmad.product_shift_left,
57 false,
58 instr.xmad.mode,
59 Immediate(static_cast<u32>(instr.xmad.imm20_16)),
60 GetRegister(instr.gpr39)};
61 default:
62 UNIMPLEMENTED_MSG("Unhandled XMAD instruction: {}", opcode->get().GetName());
63 return {false, false, false, Tegra::Shader::XmadMode::None, Immediate(0), Immediate(0)};
64 }
65 }();
66
67 op_a = SignedOperation(OperationCode::IBitfieldExtract, is_signed_a, std::move(op_a),
68 instr.xmad.high_a ? Immediate(16) : Immediate(0), Immediate(16));
69
70 const Node original_b = op_b_binding;
71 const Node op_b =
72 SignedOperation(OperationCode::IBitfieldExtract, is_signed_b, std::move(op_b_binding),
73 is_high_b ? Immediate(16) : Immediate(0), Immediate(16));
74
75 // we already check sign_a and sign_b is difference or not before so just use one in here.
76 Node product = SignedOperation(OperationCode::IMul, is_signed_a, op_a, op_b);
77 if (is_psl) {
78 product =
79 SignedOperation(OperationCode::ILogicalShiftLeft, is_signed_a, product, Immediate(16));
80 }
81 SetTemporary(bb, 0, product);
82 product = GetTemporary(0);
83
84 Node original_c = op_c;
85 const Tegra::Shader::XmadMode set_mode = mode; // Workaround to clang compile error
86 op_c = [&] {
87 switch (set_mode) {
88 case Tegra::Shader::XmadMode::None:
89 return original_c;
90 case Tegra::Shader::XmadMode::CLo:
91 return BitfieldExtract(std::move(original_c), 0, 16);
92 case Tegra::Shader::XmadMode::CHi:
93 return BitfieldExtract(std::move(original_c), 16, 16);
94 case Tegra::Shader::XmadMode::CBcc: {
95 Node shifted_b = SignedOperation(OperationCode::ILogicalShiftLeft, is_signed_b,
96 original_b, Immediate(16));
97 return SignedOperation(OperationCode::IAdd, is_signed_c, std::move(original_c),
98 std::move(shifted_b));
99 }
100 case Tegra::Shader::XmadMode::CSfu: {
101 const Node comp_a =
102 GetPredicateComparisonInteger(PredCondition::EQ, is_signed_a, op_a, Immediate(0));
103 const Node comp_b =
104 GetPredicateComparisonInteger(PredCondition::EQ, is_signed_b, op_b, Immediate(0));
105 const Node comp = Operation(OperationCode::LogicalOr, comp_a, comp_b);
106
107 const Node comp_minus_a = GetPredicateComparisonInteger(
108 PredCondition::NE, is_signed_a,
109 SignedOperation(OperationCode::IBitwiseAnd, is_signed_a, op_a,
110 Immediate(0x80000000)),
111 Immediate(0));
112 const Node comp_minus_b = GetPredicateComparisonInteger(
113 PredCondition::NE, is_signed_b,
114 SignedOperation(OperationCode::IBitwiseAnd, is_signed_b, op_b,
115 Immediate(0x80000000)),
116 Immediate(0));
117
118 Node new_c = Operation(
119 OperationCode::Select, comp_minus_a,
120 SignedOperation(OperationCode::IAdd, is_signed_c, original_c, Immediate(-65536)),
121 original_c);
122 new_c = Operation(
123 OperationCode::Select, comp_minus_b,
124 SignedOperation(OperationCode::IAdd, is_signed_c, new_c, Immediate(-65536)),
125 std::move(new_c));
126
127 return Operation(OperationCode::Select, comp, original_c, std::move(new_c));
128 }
129 default:
130 UNREACHABLE();
131 return Immediate(0);
132 }
133 }();
134
135 SetTemporary(bb, 1, op_c);
136 op_c = GetTemporary(1);
137
138 // TODO(Rodrigo): Use an appropiate sign for this operation
139 Node sum = SignedOperation(OperationCode::IAdd, is_signed_a, product, std::move(op_c));
140 SetTemporary(bb, 2, sum);
141 sum = GetTemporary(2);
142 if (is_merge) {
143 const Node a = SignedOperation(OperationCode::IBitfieldExtract, is_signed_a, std::move(sum),
144 Immediate(0), Immediate(16));
145 const Node b = SignedOperation(OperationCode::ILogicalShiftLeft, is_signed_b, original_b,
146 Immediate(16));
147 sum = SignedOperation(OperationCode::IBitwiseOr, is_signed_a, a, b);
148 }
149
150 SetInternalFlagsFromInteger(bb, sum, instr.generates_cc);
151 SetRegister(bb, instr.gpr0, std::move(sum));
152
153 return pc;
154}
155
156} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/expr.cpp b/src/video_core/shader/expr.cpp
deleted file mode 100644
index 2647865d4..000000000
--- a/src/video_core/shader/expr.cpp
+++ /dev/null
@@ -1,93 +0,0 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <memory>
6#include <variant>
7
8#include "video_core/shader/expr.h"
9
10namespace VideoCommon::Shader {
11namespace {
12bool ExprIsBoolean(const Expr& expr) {
13 return std::holds_alternative<ExprBoolean>(*expr);
14}
15
16bool ExprBooleanGet(const Expr& expr) {
17 return std::get_if<ExprBoolean>(expr.get())->value;
18}
19} // Anonymous namespace
20
21bool ExprAnd::operator==(const ExprAnd& b) const {
22 return (*operand1 == *b.operand1) && (*operand2 == *b.operand2);
23}
24
25bool ExprAnd::operator!=(const ExprAnd& b) const {
26 return !operator==(b);
27}
28
29bool ExprOr::operator==(const ExprOr& b) const {
30 return (*operand1 == *b.operand1) && (*operand2 == *b.operand2);
31}
32
33bool ExprOr::operator!=(const ExprOr& b) const {
34 return !operator==(b);
35}
36
37bool ExprNot::operator==(const ExprNot& b) const {
38 return *operand1 == *b.operand1;
39}
40
41bool ExprNot::operator!=(const ExprNot& b) const {
42 return !operator==(b);
43}
44
45Expr MakeExprNot(Expr first) {
46 if (std::holds_alternative<ExprNot>(*first)) {
47 return std::get_if<ExprNot>(first.get())->operand1;
48 }
49 return MakeExpr<ExprNot>(std::move(first));
50}
51
52Expr MakeExprAnd(Expr first, Expr second) {
53 if (ExprIsBoolean(first)) {
54 return ExprBooleanGet(first) ? second : first;
55 }
56 if (ExprIsBoolean(second)) {
57 return ExprBooleanGet(second) ? first : second;
58 }
59 return MakeExpr<ExprAnd>(std::move(first), std::move(second));
60}
61
62Expr MakeExprOr(Expr first, Expr second) {
63 if (ExprIsBoolean(first)) {
64 return ExprBooleanGet(first) ? first : second;
65 }
66 if (ExprIsBoolean(second)) {
67 return ExprBooleanGet(second) ? second : first;
68 }
69 return MakeExpr<ExprOr>(std::move(first), std::move(second));
70}
71
72bool ExprAreEqual(const Expr& first, const Expr& second) {
73 return (*first) == (*second);
74}
75
76bool ExprAreOpposite(const Expr& first, const Expr& second) {
77 if (std::holds_alternative<ExprNot>(*first)) {
78 return ExprAreEqual(std::get_if<ExprNot>(first.get())->operand1, second);
79 }
80 if (std::holds_alternative<ExprNot>(*second)) {
81 return ExprAreEqual(std::get_if<ExprNot>(second.get())->operand1, first);
82 }
83 return false;
84}
85
86bool ExprIsTrue(const Expr& first) {
87 if (ExprIsBoolean(first)) {
88 return ExprBooleanGet(first);
89 }
90 return false;
91}
92
93} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/expr.h b/src/video_core/shader/expr.h
deleted file mode 100644
index cda284c72..000000000
--- a/src/video_core/shader/expr.h
+++ /dev/null
@@ -1,156 +0,0 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <memory>
8#include <variant>
9
10#include "video_core/engines/shader_bytecode.h"
11
12namespace VideoCommon::Shader {
13
14using Tegra::Shader::ConditionCode;
15using Tegra::Shader::Pred;
16
17class ExprAnd;
18class ExprBoolean;
19class ExprCondCode;
20class ExprGprEqual;
21class ExprNot;
22class ExprOr;
23class ExprPredicate;
24class ExprVar;
25
26using ExprData = std::variant<ExprVar, ExprCondCode, ExprPredicate, ExprNot, ExprOr, ExprAnd,
27 ExprBoolean, ExprGprEqual>;
28using Expr = std::shared_ptr<ExprData>;
29
30class ExprAnd final {
31public:
32 explicit ExprAnd(Expr a, Expr b) : operand1{std::move(a)}, operand2{std::move(b)} {}
33
34 bool operator==(const ExprAnd& b) const;
35 bool operator!=(const ExprAnd& b) const;
36
37 Expr operand1;
38 Expr operand2;
39};
40
41class ExprOr final {
42public:
43 explicit ExprOr(Expr a, Expr b) : operand1{std::move(a)}, operand2{std::move(b)} {}
44
45 bool operator==(const ExprOr& b) const;
46 bool operator!=(const ExprOr& b) const;
47
48 Expr operand1;
49 Expr operand2;
50};
51
52class ExprNot final {
53public:
54 explicit ExprNot(Expr a) : operand1{std::move(a)} {}
55
56 bool operator==(const ExprNot& b) const;
57 bool operator!=(const ExprNot& b) const;
58
59 Expr operand1;
60};
61
62class ExprVar final {
63public:
64 explicit ExprVar(u32 index) : var_index{index} {}
65
66 bool operator==(const ExprVar& b) const {
67 return var_index == b.var_index;
68 }
69
70 bool operator!=(const ExprVar& b) const {
71 return !operator==(b);
72 }
73
74 u32 var_index;
75};
76
77class ExprPredicate final {
78public:
79 explicit ExprPredicate(u32 predicate_) : predicate{predicate_} {}
80
81 bool operator==(const ExprPredicate& b) const {
82 return predicate == b.predicate;
83 }
84
85 bool operator!=(const ExprPredicate& b) const {
86 return !operator==(b);
87 }
88
89 u32 predicate;
90};
91
92class ExprCondCode final {
93public:
94 explicit ExprCondCode(ConditionCode condition_code) : cc{condition_code} {}
95
96 bool operator==(const ExprCondCode& b) const {
97 return cc == b.cc;
98 }
99
100 bool operator!=(const ExprCondCode& b) const {
101 return !operator==(b);
102 }
103
104 ConditionCode cc;
105};
106
107class ExprBoolean final {
108public:
109 explicit ExprBoolean(bool val) : value{val} {}
110
111 bool operator==(const ExprBoolean& b) const {
112 return value == b.value;
113 }
114
115 bool operator!=(const ExprBoolean& b) const {
116 return !operator==(b);
117 }
118
119 bool value;
120};
121
122class ExprGprEqual final {
123public:
124 explicit ExprGprEqual(u32 gpr_, u32 value_) : gpr{gpr_}, value{value_} {}
125
126 bool operator==(const ExprGprEqual& b) const {
127 return gpr == b.gpr && value == b.value;
128 }
129
130 bool operator!=(const ExprGprEqual& b) const {
131 return !operator==(b);
132 }
133
134 u32 gpr;
135 u32 value;
136};
137
138template <typename T, typename... Args>
139Expr MakeExpr(Args&&... args) {
140 static_assert(std::is_convertible_v<T, ExprData>);
141 return std::make_shared<ExprData>(T(std::forward<Args>(args)...));
142}
143
144bool ExprAreEqual(const Expr& first, const Expr& second);
145
146bool ExprAreOpposite(const Expr& first, const Expr& second);
147
148Expr MakeExprNot(Expr first);
149
150Expr MakeExprAnd(Expr first, Expr second);
151
152Expr MakeExprOr(Expr first, Expr second);
153
154bool ExprIsTrue(const Expr& first);
155
156} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/memory_util.cpp b/src/video_core/shader/memory_util.cpp
deleted file mode 100644
index e18ccba8e..000000000
--- a/src/video_core/shader/memory_util.cpp
+++ /dev/null
@@ -1,76 +0,0 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <algorithm>
6#include <cstddef>
7
8#include <boost/container_hash/hash.hpp>
9
10#include "common/common_types.h"
11#include "core/core.h"
12#include "video_core/engines/maxwell_3d.h"
13#include "video_core/memory_manager.h"
14#include "video_core/shader/memory_util.h"
15#include "video_core/shader/shader_ir.h"
16
17namespace VideoCommon::Shader {
18
19GPUVAddr GetShaderAddress(Tegra::Engines::Maxwell3D& maxwell3d,
20 Tegra::Engines::Maxwell3D::Regs::ShaderProgram program) {
21 const auto& shader_config{maxwell3d.regs.shader_config[static_cast<std::size_t>(program)]};
22 return maxwell3d.regs.code_address.CodeAddress() + shader_config.offset;
23}
24
25bool IsSchedInstruction(std::size_t offset, std::size_t main_offset) {
26 // Sched instructions appear once every 4 instructions.
27 constexpr std::size_t SchedPeriod = 4;
28 const std::size_t absolute_offset = offset - main_offset;
29 return (absolute_offset % SchedPeriod) == 0;
30}
31
32std::size_t CalculateProgramSize(const ProgramCode& program, bool is_compute) {
33 // This is the encoded version of BRA that jumps to itself. All Nvidia
34 // shaders end with one.
35 static constexpr u64 SELF_JUMPING_BRANCH = 0xE2400FFFFF07000FULL;
36 static constexpr u64 MASK = 0xFFFFFFFFFF7FFFFFULL;
37
38 const std::size_t start_offset = is_compute ? KERNEL_MAIN_OFFSET : STAGE_MAIN_OFFSET;
39 std::size_t offset = start_offset;
40 while (offset < program.size()) {
41 const u64 instruction = program[offset];
42 if (!IsSchedInstruction(offset, start_offset)) {
43 if ((instruction & MASK) == SELF_JUMPING_BRANCH) {
44 // End on Maxwell's "nop" instruction
45 break;
46 }
47 if (instruction == 0) {
48 break;
49 }
50 }
51 ++offset;
52 }
53 // The last instruction is included in the program size
54 return std::min(offset + 1, program.size());
55}
56
57ProgramCode GetShaderCode(Tegra::MemoryManager& memory_manager, GPUVAddr gpu_addr,
58 const u8* host_ptr, bool is_compute) {
59 ProgramCode code(VideoCommon::Shader::MAX_PROGRAM_LENGTH);
60 ASSERT_OR_EXECUTE(host_ptr != nullptr, { return code; });
61 memory_manager.ReadBlockUnsafe(gpu_addr, code.data(), code.size() * sizeof(u64));
62 code.resize(CalculateProgramSize(code, is_compute));
63 return code;
64}
65
66u64 GetUniqueIdentifier(Tegra::Engines::ShaderType shader_type, bool is_a, const ProgramCode& code,
67 const ProgramCode& code_b) {
68 size_t unique_identifier = boost::hash_value(code);
69 if (is_a) {
70 // VertexA programs include two programs
71 boost::hash_combine(unique_identifier, boost::hash_value(code_b));
72 }
73 return static_cast<u64>(unique_identifier);
74}
75
76} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/memory_util.h b/src/video_core/shader/memory_util.h
deleted file mode 100644
index 4624d38e6..000000000
--- a/src/video_core/shader/memory_util.h
+++ /dev/null
@@ -1,43 +0,0 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <cstddef>
8#include <vector>
9
10#include "common/common_types.h"
11#include "video_core/engines/maxwell_3d.h"
12#include "video_core/engines/shader_type.h"
13
14namespace Tegra {
15class MemoryManager;
16}
17
18namespace VideoCommon::Shader {
19
20using ProgramCode = std::vector<u64>;
21
22constexpr u32 STAGE_MAIN_OFFSET = 10;
23constexpr u32 KERNEL_MAIN_OFFSET = 0;
24
25/// Gets the address for the specified shader stage program
26GPUVAddr GetShaderAddress(Tegra::Engines::Maxwell3D& maxwell3d,
27 Tegra::Engines::Maxwell3D::Regs::ShaderProgram program);
28
29/// Gets if the current instruction offset is a scheduler instruction
30bool IsSchedInstruction(std::size_t offset, std::size_t main_offset);
31
32/// Calculates the size of a program stream
33std::size_t CalculateProgramSize(const ProgramCode& program, bool is_compute);
34
35/// Gets the shader program code from memory for the specified address
36ProgramCode GetShaderCode(Tegra::MemoryManager& memory_manager, GPUVAddr gpu_addr,
37 const u8* host_ptr, bool is_compute);
38
39/// Hashes one (or two) program streams
40u64 GetUniqueIdentifier(Tegra::Engines::ShaderType shader_type, bool is_a, const ProgramCode& code,
41 const ProgramCode& code_b = {});
42
43} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/node.h b/src/video_core/shader/node.h
deleted file mode 100644
index b54d33763..000000000
--- a/src/video_core/shader/node.h
+++ /dev/null
@@ -1,701 +0,0 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <array>
8#include <cstddef>
9#include <memory>
10#include <optional>
11#include <string>
12#include <tuple>
13#include <utility>
14#include <variant>
15#include <vector>
16
17#include "common/common_types.h"
18#include "video_core/engines/shader_bytecode.h"
19
20namespace VideoCommon::Shader {
21
22enum class OperationCode {
23 Assign, /// (float& dest, float src) -> void
24
25 Select, /// (MetaArithmetic, bool pred, float a, float b) -> float
26
27 FAdd, /// (MetaArithmetic, float a, float b) -> float
28 FMul, /// (MetaArithmetic, float a, float b) -> float
29 FDiv, /// (MetaArithmetic, float a, float b) -> float
30 FFma, /// (MetaArithmetic, float a, float b, float c) -> float
31 FNegate, /// (MetaArithmetic, float a) -> float
32 FAbsolute, /// (MetaArithmetic, float a) -> float
33 FClamp, /// (MetaArithmetic, float value, float min, float max) -> float
34 FCastHalf0, /// (MetaArithmetic, f16vec2 a) -> float
35 FCastHalf1, /// (MetaArithmetic, f16vec2 a) -> float
36 FMin, /// (MetaArithmetic, float a, float b) -> float
37 FMax, /// (MetaArithmetic, float a, float b) -> float
38 FCos, /// (MetaArithmetic, float a) -> float
39 FSin, /// (MetaArithmetic, float a) -> float
40 FExp2, /// (MetaArithmetic, float a) -> float
41 FLog2, /// (MetaArithmetic, float a) -> float
42 FInverseSqrt, /// (MetaArithmetic, float a) -> float
43 FSqrt, /// (MetaArithmetic, float a) -> float
44 FRoundEven, /// (MetaArithmetic, float a) -> float
45 FFloor, /// (MetaArithmetic, float a) -> float
46 FCeil, /// (MetaArithmetic, float a) -> float
47 FTrunc, /// (MetaArithmetic, float a) -> float
48 FCastInteger, /// (MetaArithmetic, int a) -> float
49 FCastUInteger, /// (MetaArithmetic, uint a) -> float
50 FSwizzleAdd, /// (float a, float b, uint mask) -> float
51
52 IAdd, /// (MetaArithmetic, int a, int b) -> int
53 IMul, /// (MetaArithmetic, int a, int b) -> int
54 IDiv, /// (MetaArithmetic, int a, int b) -> int
55 INegate, /// (MetaArithmetic, int a) -> int
56 IAbsolute, /// (MetaArithmetic, int a) -> int
57 IMin, /// (MetaArithmetic, int a, int b) -> int
58 IMax, /// (MetaArithmetic, int a, int b) -> int
59 ICastFloat, /// (MetaArithmetic, float a) -> int
60 ICastUnsigned, /// (MetaArithmetic, uint a) -> int
61 ILogicalShiftLeft, /// (MetaArithmetic, int a, uint b) -> int
62 ILogicalShiftRight, /// (MetaArithmetic, int a, uint b) -> int
63 IArithmeticShiftRight, /// (MetaArithmetic, int a, uint b) -> int
64 IBitwiseAnd, /// (MetaArithmetic, int a, int b) -> int
65 IBitwiseOr, /// (MetaArithmetic, int a, int b) -> int
66 IBitwiseXor, /// (MetaArithmetic, int a, int b) -> int
67 IBitwiseNot, /// (MetaArithmetic, int a) -> int
68 IBitfieldInsert, /// (MetaArithmetic, int base, int insert, int offset, int bits) -> int
69 IBitfieldExtract, /// (MetaArithmetic, int value, int offset, int offset) -> int
70 IBitCount, /// (MetaArithmetic, int) -> int
71 IBitMSB, /// (MetaArithmetic, int) -> int
72
73 UAdd, /// (MetaArithmetic, uint a, uint b) -> uint
74 UMul, /// (MetaArithmetic, uint a, uint b) -> uint
75 UDiv, /// (MetaArithmetic, uint a, uint b) -> uint
76 UMin, /// (MetaArithmetic, uint a, uint b) -> uint
77 UMax, /// (MetaArithmetic, uint a, uint b) -> uint
78 UCastFloat, /// (MetaArithmetic, float a) -> uint
79 UCastSigned, /// (MetaArithmetic, int a) -> uint
80 ULogicalShiftLeft, /// (MetaArithmetic, uint a, uint b) -> uint
81 ULogicalShiftRight, /// (MetaArithmetic, uint a, uint b) -> uint
82 UArithmeticShiftRight, /// (MetaArithmetic, uint a, uint b) -> uint
83 UBitwiseAnd, /// (MetaArithmetic, uint a, uint b) -> uint
84 UBitwiseOr, /// (MetaArithmetic, uint a, uint b) -> uint
85 UBitwiseXor, /// (MetaArithmetic, uint a, uint b) -> uint
86 UBitwiseNot, /// (MetaArithmetic, uint a) -> uint
87 UBitfieldInsert, /// (MetaArithmetic, uint base, uint insert, int offset, int bits) -> uint
88 UBitfieldExtract, /// (MetaArithmetic, uint value, int offset, int offset) -> uint
89 UBitCount, /// (MetaArithmetic, uint) -> uint
90 UBitMSB, /// (MetaArithmetic, uint) -> uint
91
92 HAdd, /// (MetaArithmetic, f16vec2 a, f16vec2 b) -> f16vec2
93 HMul, /// (MetaArithmetic, f16vec2 a, f16vec2 b) -> f16vec2
94 HFma, /// (MetaArithmetic, f16vec2 a, f16vec2 b, f16vec2 c) -> f16vec2
95 HAbsolute, /// (f16vec2 a) -> f16vec2
96 HNegate, /// (f16vec2 a, bool first, bool second) -> f16vec2
97 HClamp, /// (f16vec2 src, float min, float max) -> f16vec2
98 HCastFloat, /// (MetaArithmetic, float a) -> f16vec2
99 HUnpack, /// (Tegra::Shader::HalfType, T value) -> f16vec2
100 HMergeF32, /// (f16vec2 src) -> float
101 HMergeH0, /// (f16vec2 dest, f16vec2 src) -> f16vec2
102 HMergeH1, /// (f16vec2 dest, f16vec2 src) -> f16vec2
103 HPack2, /// (float a, float b) -> f16vec2
104
105 LogicalAssign, /// (bool& dst, bool src) -> void
106 LogicalAnd, /// (bool a, bool b) -> bool
107 LogicalOr, /// (bool a, bool b) -> bool
108 LogicalXor, /// (bool a, bool b) -> bool
109 LogicalNegate, /// (bool a) -> bool
110 LogicalPick2, /// (bool2 pair, uint index) -> bool
111 LogicalAnd2, /// (bool2 a) -> bool
112
113 LogicalFOrdLessThan, /// (float a, float b) -> bool
114 LogicalFOrdEqual, /// (float a, float b) -> bool
115 LogicalFOrdLessEqual, /// (float a, float b) -> bool
116 LogicalFOrdGreaterThan, /// (float a, float b) -> bool
117 LogicalFOrdNotEqual, /// (float a, float b) -> bool
118 LogicalFOrdGreaterEqual, /// (float a, float b) -> bool
119 LogicalFOrdered, /// (float a, float b) -> bool
120 LogicalFUnordered, /// (float a, float b) -> bool
121 LogicalFUnordLessThan, /// (float a, float b) -> bool
122 LogicalFUnordEqual, /// (float a, float b) -> bool
123 LogicalFUnordLessEqual, /// (float a, float b) -> bool
124 LogicalFUnordGreaterThan, /// (float a, float b) -> bool
125 LogicalFUnordNotEqual, /// (float a, float b) -> bool
126 LogicalFUnordGreaterEqual, /// (float a, float b) -> bool
127
128 LogicalILessThan, /// (int a, int b) -> bool
129 LogicalIEqual, /// (int a, int b) -> bool
130 LogicalILessEqual, /// (int a, int b) -> bool
131 LogicalIGreaterThan, /// (int a, int b) -> bool
132 LogicalINotEqual, /// (int a, int b) -> bool
133 LogicalIGreaterEqual, /// (int a, int b) -> bool
134
135 LogicalULessThan, /// (uint a, uint b) -> bool
136 LogicalUEqual, /// (uint a, uint b) -> bool
137 LogicalULessEqual, /// (uint a, uint b) -> bool
138 LogicalUGreaterThan, /// (uint a, uint b) -> bool
139 LogicalUNotEqual, /// (uint a, uint b) -> bool
140 LogicalUGreaterEqual, /// (uint a, uint b) -> bool
141
142 LogicalAddCarry, /// (uint a, uint b) -> bool
143
144 Logical2HLessThan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2
145 Logical2HEqual, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2
146 Logical2HLessEqual, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2
147 Logical2HGreaterThan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2
148 Logical2HNotEqual, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2
149 Logical2HGreaterEqual, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2
150 Logical2HLessThanWithNan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2
151 Logical2HEqualWithNan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2
152 Logical2HLessEqualWithNan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2
153 Logical2HGreaterThanWithNan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2
154 Logical2HNotEqualWithNan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2
155 Logical2HGreaterEqualWithNan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2
156
157 Texture, /// (MetaTexture, float[N] coords) -> float4
158 TextureLod, /// (MetaTexture, float[N] coords) -> float4
159 TextureGather, /// (MetaTexture, float[N] coords) -> float4
160 TextureQueryDimensions, /// (MetaTexture, float a) -> float4
161 TextureQueryLod, /// (MetaTexture, float[N] coords) -> float4
162 TexelFetch, /// (MetaTexture, int[N], int) -> float4
163 TextureGradient, /// (MetaTexture, float[N] coords, float[N*2] derivates) -> float4
164
165 ImageLoad, /// (MetaImage, int[N] coords) -> void
166 ImageStore, /// (MetaImage, int[N] coords) -> void
167
168 AtomicImageAdd, /// (MetaImage, int[N] coords) -> void
169 AtomicImageAnd, /// (MetaImage, int[N] coords) -> void
170 AtomicImageOr, /// (MetaImage, int[N] coords) -> void
171 AtomicImageXor, /// (MetaImage, int[N] coords) -> void
172 AtomicImageExchange, /// (MetaImage, int[N] coords) -> void
173
174 AtomicUExchange, /// (memory, uint) -> uint
175 AtomicUAdd, /// (memory, uint) -> uint
176 AtomicUMin, /// (memory, uint) -> uint
177 AtomicUMax, /// (memory, uint) -> uint
178 AtomicUAnd, /// (memory, uint) -> uint
179 AtomicUOr, /// (memory, uint) -> uint
180 AtomicUXor, /// (memory, uint) -> uint
181
182 AtomicIExchange, /// (memory, int) -> int
183 AtomicIAdd, /// (memory, int) -> int
184 AtomicIMin, /// (memory, int) -> int
185 AtomicIMax, /// (memory, int) -> int
186 AtomicIAnd, /// (memory, int) -> int
187 AtomicIOr, /// (memory, int) -> int
188 AtomicIXor, /// (memory, int) -> int
189
190 ReduceUAdd, /// (memory, uint) -> void
191 ReduceUMin, /// (memory, uint) -> void
192 ReduceUMax, /// (memory, uint) -> void
193 ReduceUAnd, /// (memory, uint) -> void
194 ReduceUOr, /// (memory, uint) -> void
195 ReduceUXor, /// (memory, uint) -> void
196
197 ReduceIAdd, /// (memory, int) -> void
198 ReduceIMin, /// (memory, int) -> void
199 ReduceIMax, /// (memory, int) -> void
200 ReduceIAnd, /// (memory, int) -> void
201 ReduceIOr, /// (memory, int) -> void
202 ReduceIXor, /// (memory, int) -> void
203
204 Branch, /// (uint branch_target) -> void
205 BranchIndirect, /// (uint branch_target) -> void
206 PushFlowStack, /// (uint branch_target) -> void
207 PopFlowStack, /// () -> void
208 Exit, /// () -> void
209 Discard, /// () -> void
210
211 EmitVertex, /// () -> void
212 EndPrimitive, /// () -> void
213
214 InvocationId, /// () -> int
215 YNegate, /// () -> float
216 LocalInvocationIdX, /// () -> uint
217 LocalInvocationIdY, /// () -> uint
218 LocalInvocationIdZ, /// () -> uint
219 WorkGroupIdX, /// () -> uint
220 WorkGroupIdY, /// () -> uint
221 WorkGroupIdZ, /// () -> uint
222
223 BallotThread, /// (bool) -> uint
224 VoteAll, /// (bool) -> bool
225 VoteAny, /// (bool) -> bool
226 VoteEqual, /// (bool) -> bool
227
228 ThreadId, /// () -> uint
229 ThreadEqMask, /// () -> uint
230 ThreadGeMask, /// () -> uint
231 ThreadGtMask, /// () -> uint
232 ThreadLeMask, /// () -> uint
233 ThreadLtMask, /// () -> uint
234 ShuffleIndexed, /// (uint value, uint index) -> uint
235
236 Barrier, /// () -> void
237 MemoryBarrierGroup, /// () -> void
238 MemoryBarrierGlobal, /// () -> void
239
240 Amount,
241};
242
243enum class InternalFlag {
244 Zero = 0,
245 Sign = 1,
246 Carry = 2,
247 Overflow = 3,
248 Amount = 4,
249};
250
251enum class MetaStackClass {
252 Ssy,
253 Pbk,
254};
255
256class OperationNode;
257class ConditionalNode;
258class GprNode;
259class CustomVarNode;
260class ImmediateNode;
261class InternalFlagNode;
262class PredicateNode;
263class AbufNode;
264class CbufNode;
265class LmemNode;
266class PatchNode;
267class SmemNode;
268class GmemNode;
269class CommentNode;
270
271using NodeData = std::variant<OperationNode, ConditionalNode, GprNode, CustomVarNode, ImmediateNode,
272 InternalFlagNode, PredicateNode, AbufNode, PatchNode, CbufNode,
273 LmemNode, SmemNode, GmemNode, CommentNode>;
274using Node = std::shared_ptr<NodeData>;
275using Node4 = std::array<Node, 4>;
276using NodeBlock = std::vector<Node>;
277
278struct ArraySamplerNode;
279struct BindlessSamplerNode;
280struct SeparateSamplerNode;
281
282using TrackSamplerData = std::variant<BindlessSamplerNode, SeparateSamplerNode, ArraySamplerNode>;
283using TrackSampler = std::shared_ptr<TrackSamplerData>;
284
285struct SamplerEntry {
286 /// Bound samplers constructor
287 explicit SamplerEntry(u32 index_, u32 offset_, Tegra::Shader::TextureType type_, bool is_array_,
288 bool is_shadow_, bool is_buffer_, bool is_indexed_)
289 : index{index_}, offset{offset_}, type{type_}, is_array{is_array_}, is_shadow{is_shadow_},
290 is_buffer{is_buffer_}, is_indexed{is_indexed_} {}
291
292 /// Separate sampler constructor
293 explicit SamplerEntry(u32 index_, std::pair<u32, u32> offsets, std::pair<u32, u32> buffers,
294 Tegra::Shader::TextureType type_, bool is_array_, bool is_shadow_,
295 bool is_buffer_)
296 : index{index_}, offset{offsets.first}, secondary_offset{offsets.second},
297 buffer{buffers.first}, secondary_buffer{buffers.second}, type{type_}, is_array{is_array_},
298 is_shadow{is_shadow_}, is_buffer{is_buffer_}, is_separated{true} {}
299
300 /// Bindless samplers constructor
301 explicit SamplerEntry(u32 index_, u32 offset_, u32 buffer_, Tegra::Shader::TextureType type_,
302 bool is_array_, bool is_shadow_, bool is_buffer_, bool is_indexed_)
303 : index{index_}, offset{offset_}, buffer{buffer_}, type{type_}, is_array{is_array_},
304 is_shadow{is_shadow_}, is_buffer{is_buffer_}, is_bindless{true}, is_indexed{is_indexed_} {
305 }
306
307 u32 index = 0; ///< Emulated index given for the this sampler.
308 u32 offset = 0; ///< Offset in the const buffer from where the sampler is being read.
309 u32 secondary_offset = 0; ///< Secondary offset in the const buffer.
310 u32 buffer = 0; ///< Buffer where the bindless sampler is read.
311 u32 secondary_buffer = 0; ///< Secondary buffer where the bindless sampler is read.
312 u32 size = 1; ///< Size of the sampler.
313
314 Tegra::Shader::TextureType type{}; ///< The type used to sample this texture (Texture2D, etc)
315 bool is_array = false; ///< Whether the texture is being sampled as an array texture or not.
316 bool is_shadow = false; ///< Whether the texture is being sampled as a depth texture or not.
317 bool is_buffer = false; ///< Whether the texture is a texture buffer without sampler.
318 bool is_bindless = false; ///< Whether this sampler belongs to a bindless texture or not.
319 bool is_indexed = false; ///< Whether this sampler is an indexed array of textures.
320 bool is_separated = false; ///< Whether the image and sampler is separated or not.
321};
322
323/// Represents a tracked bindless sampler into a direct const buffer
324struct ArraySamplerNode {
325 u32 index;
326 u32 base_offset;
327 u32 bindless_var;
328};
329
330/// Represents a tracked separate sampler image pair that was folded statically
331struct SeparateSamplerNode {
332 std::pair<u32, u32> indices;
333 std::pair<u32, u32> offsets;
334};
335
336/// Represents a tracked bindless sampler into a direct const buffer
337struct BindlessSamplerNode {
338 u32 index;
339 u32 offset;
340};
341
342struct ImageEntry {
343public:
344 /// Bound images constructor
345 explicit ImageEntry(u32 index_, u32 offset_, Tegra::Shader::ImageType type_)
346 : index{index_}, offset{offset_}, type{type_} {}
347
348 /// Bindless samplers constructor
349 explicit ImageEntry(u32 index_, u32 offset_, u32 buffer_, Tegra::Shader::ImageType type_)
350 : index{index_}, offset{offset_}, buffer{buffer_}, type{type_}, is_bindless{true} {}
351
352 void MarkWrite() {
353 is_written = true;
354 }
355
356 void MarkRead() {
357 is_read = true;
358 }
359
360 void MarkAtomic() {
361 MarkWrite();
362 MarkRead();
363 is_atomic = true;
364 }
365
366 u32 index = 0;
367 u32 offset = 0;
368 u32 buffer = 0;
369
370 Tegra::Shader::ImageType type{};
371 bool is_bindless = false;
372 bool is_written = false;
373 bool is_read = false;
374 bool is_atomic = false;
375};
376
377struct GlobalMemoryBase {
378 u32 cbuf_index = 0;
379 u32 cbuf_offset = 0;
380
381 [[nodiscard]] bool operator<(const GlobalMemoryBase& rhs) const {
382 return std::tie(cbuf_index, cbuf_offset) < std::tie(rhs.cbuf_index, rhs.cbuf_offset);
383 }
384};
385
386/// Parameters describing an arithmetic operation
387struct MetaArithmetic {
388 bool precise{}; ///< Whether the operation can be constraint or not
389};
390
391/// Parameters describing a texture sampler
392struct MetaTexture {
393 SamplerEntry sampler;
394 Node array;
395 Node depth_compare;
396 std::vector<Node> aoffi;
397 std::vector<Node> ptp;
398 std::vector<Node> derivates;
399 Node bias;
400 Node lod;
401 Node component;
402 u32 element{};
403 Node index;
404};
405
406struct MetaImage {
407 const ImageEntry& image;
408 std::vector<Node> values;
409 u32 element{};
410};
411
412/// Parameters that modify an operation but are not part of any particular operand
413using Meta =
414 std::variant<MetaArithmetic, MetaTexture, MetaImage, MetaStackClass, Tegra::Shader::HalfType>;
415
416class AmendNode {
417public:
418 [[nodiscard]] std::optional<std::size_t> GetAmendIndex() const {
419 if (amend_index == amend_null_index) {
420 return std::nullopt;
421 }
422 return {amend_index};
423 }
424
425 void SetAmendIndex(std::size_t index) {
426 amend_index = index;
427 }
428
429 void ClearAmend() {
430 amend_index = amend_null_index;
431 }
432
433private:
434 static constexpr std::size_t amend_null_index = 0xFFFFFFFFFFFFFFFFULL;
435 std::size_t amend_index{amend_null_index};
436};
437
438/// Holds any kind of operation that can be done in the IR
439class OperationNode final : public AmendNode {
440public:
441 explicit OperationNode(OperationCode code_) : OperationNode(code_, Meta{}) {}
442
443 explicit OperationNode(OperationCode code_, Meta meta_)
444 : OperationNode(code_, std::move(meta_), std::vector<Node>{}) {}
445
446 explicit OperationNode(OperationCode code_, std::vector<Node> operands_)
447 : OperationNode(code_, Meta{}, std::move(operands_)) {}
448
449 explicit OperationNode(OperationCode code_, Meta meta_, std::vector<Node> operands_)
450 : code{code_}, meta{std::move(meta_)}, operands{std::move(operands_)} {}
451
452 template <typename... Args>
453 explicit OperationNode(OperationCode code_, Meta meta_, Args&&... operands_)
454 : code{code_}, meta{std::move(meta_)}, operands{operands_...} {}
455
456 [[nodiscard]] OperationCode GetCode() const {
457 return code;
458 }
459
460 [[nodiscard]] const Meta& GetMeta() const {
461 return meta;
462 }
463
464 [[nodiscard]] std::size_t GetOperandsCount() const {
465 return operands.size();
466 }
467
468 [[nodiscard]] const Node& operator[](std::size_t operand_index) const {
469 return operands.at(operand_index);
470 }
471
472private:
473 OperationCode code{};
474 Meta meta{};
475 std::vector<Node> operands;
476};
477
478/// Encloses inside any kind of node that returns a boolean conditionally-executed code
479class ConditionalNode final : public AmendNode {
480public:
481 explicit ConditionalNode(Node condition_, std::vector<Node>&& code_)
482 : condition{std::move(condition_)}, code{std::move(code_)} {}
483
484 [[nodiscard]] const Node& GetCondition() const {
485 return condition;
486 }
487
488 [[nodiscard]] const std::vector<Node>& GetCode() const {
489 return code;
490 }
491
492private:
493 Node condition; ///< Condition to be satisfied
494 std::vector<Node> code; ///< Code to execute
495};
496
497/// A general purpose register
498class GprNode final {
499public:
500 explicit constexpr GprNode(Tegra::Shader::Register index_) : index{index_} {}
501
502 [[nodiscard]] constexpr u32 GetIndex() const {
503 return static_cast<u32>(index);
504 }
505
506private:
507 Tegra::Shader::Register index{};
508};
509
510/// A custom variable
511class CustomVarNode final {
512public:
513 explicit constexpr CustomVarNode(u32 index_) : index{index_} {}
514
515 [[nodiscard]] constexpr u32 GetIndex() const {
516 return index;
517 }
518
519private:
520 u32 index{};
521};
522
523/// A 32-bits value that represents an immediate value
524class ImmediateNode final {
525public:
526 explicit constexpr ImmediateNode(u32 value_) : value{value_} {}
527
528 [[nodiscard]] constexpr u32 GetValue() const {
529 return value;
530 }
531
532private:
533 u32 value{};
534};
535
536/// One of Maxwell's internal flags
537class InternalFlagNode final {
538public:
539 explicit constexpr InternalFlagNode(InternalFlag flag_) : flag{flag_} {}
540
541 [[nodiscard]] constexpr InternalFlag GetFlag() const {
542 return flag;
543 }
544
545private:
546 InternalFlag flag{};
547};
548
549/// A predicate register, it can be negated without additional nodes
550class PredicateNode final {
551public:
552 explicit constexpr PredicateNode(Tegra::Shader::Pred index_, bool negated_)
553 : index{index_}, negated{negated_} {}
554
555 [[nodiscard]] constexpr Tegra::Shader::Pred GetIndex() const {
556 return index;
557 }
558
559 [[nodiscard]] constexpr bool IsNegated() const {
560 return negated;
561 }
562
563private:
564 Tegra::Shader::Pred index{};
565 bool negated{};
566};
567
568/// Attribute buffer memory (known as attributes or varyings in GLSL terms)
569class AbufNode final {
570public:
571 // Initialize for standard attributes (index is explicit).
572 explicit AbufNode(Tegra::Shader::Attribute::Index index_, u32 element_, Node buffer_ = {})
573 : buffer{std::move(buffer_)}, index{index_}, element{element_} {}
574
575 // Initialize for physical attributes (index is a variable value).
576 explicit AbufNode(Node physical_address_, Node buffer_ = {})
577 : physical_address{std::move(physical_address_)}, buffer{std::move(buffer_)} {}
578
579 [[nodiscard]] Tegra::Shader::Attribute::Index GetIndex() const {
580 return index;
581 }
582
583 [[nodiscard]] u32 GetElement() const {
584 return element;
585 }
586
587 [[nodiscard]] const Node& GetBuffer() const {
588 return buffer;
589 }
590
591 [[nodiscard]] bool IsPhysicalBuffer() const {
592 return static_cast<bool>(physical_address);
593 }
594
595 [[nodiscard]] const Node& GetPhysicalAddress() const {
596 return physical_address;
597 }
598
599private:
600 Node physical_address;
601 Node buffer;
602 Tegra::Shader::Attribute::Index index{};
603 u32 element{};
604};
605
606/// Patch memory (used to communicate tessellation stages).
607class PatchNode final {
608public:
609 explicit constexpr PatchNode(u32 offset_) : offset{offset_} {}
610
611 [[nodiscard]] constexpr u32 GetOffset() const {
612 return offset;
613 }
614
615private:
616 u32 offset{};
617};
618
619/// Constant buffer node, usually mapped to uniform buffers in GLSL
620class CbufNode final {
621public:
622 explicit CbufNode(u32 index_, Node offset_) : index{index_}, offset{std::move(offset_)} {}
623
624 [[nodiscard]] u32 GetIndex() const {
625 return index;
626 }
627
628 [[nodiscard]] const Node& GetOffset() const {
629 return offset;
630 }
631
632private:
633 u32 index{};
634 Node offset;
635};
636
637/// Local memory node
638class LmemNode final {
639public:
640 explicit LmemNode(Node address_) : address{std::move(address_)} {}
641
642 [[nodiscard]] const Node& GetAddress() const {
643 return address;
644 }
645
646private:
647 Node address;
648};
649
650/// Shared memory node
651class SmemNode final {
652public:
653 explicit SmemNode(Node address_) : address{std::move(address_)} {}
654
655 [[nodiscard]] const Node& GetAddress() const {
656 return address;
657 }
658
659private:
660 Node address;
661};
662
663/// Global memory node
664class GmemNode final {
665public:
666 explicit GmemNode(Node real_address_, Node base_address_, const GlobalMemoryBase& descriptor_)
667 : real_address{std::move(real_address_)}, base_address{std::move(base_address_)},
668 descriptor{descriptor_} {}
669
670 [[nodiscard]] const Node& GetRealAddress() const {
671 return real_address;
672 }
673
674 [[nodiscard]] const Node& GetBaseAddress() const {
675 return base_address;
676 }
677
678 [[nodiscard]] const GlobalMemoryBase& GetDescriptor() const {
679 return descriptor;
680 }
681
682private:
683 Node real_address;
684 Node base_address;
685 GlobalMemoryBase descriptor;
686};
687
688/// Commentary, can be dropped
689class CommentNode final {
690public:
691 explicit CommentNode(std::string text_) : text{std::move(text_)} {}
692
693 [[nodiscard]] const std::string& GetText() const {
694 return text;
695 }
696
697private:
698 std::string text;
699};
700
701} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/node_helper.cpp b/src/video_core/shader/node_helper.cpp
deleted file mode 100644
index 6a5b6940d..000000000
--- a/src/video_core/shader/node_helper.cpp
+++ /dev/null
@@ -1,115 +0,0 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <cstring>
6#include <vector>
7
8#include "common/common_types.h"
9#include "video_core/shader/node_helper.h"
10#include "video_core/shader/shader_ir.h"
11
12namespace VideoCommon::Shader {
13
14Node Conditional(Node condition, std::vector<Node> code) {
15 return MakeNode<ConditionalNode>(std::move(condition), std::move(code));
16}
17
18Node Comment(std::string text) {
19 return MakeNode<CommentNode>(std::move(text));
20}
21
22Node Immediate(u32 value) {
23 return MakeNode<ImmediateNode>(value);
24}
25
26Node Immediate(s32 value) {
27 return Immediate(static_cast<u32>(value));
28}
29
30Node Immediate(f32 value) {
31 u32 integral;
32 std::memcpy(&integral, &value, sizeof(u32));
33 return Immediate(integral);
34}
35
36OperationCode SignedToUnsignedCode(OperationCode operation_code, bool is_signed) {
37 if (is_signed) {
38 return operation_code;
39 }
40 switch (operation_code) {
41 case OperationCode::FCastInteger:
42 return OperationCode::FCastUInteger;
43 case OperationCode::IAdd:
44 return OperationCode::UAdd;
45 case OperationCode::IMul:
46 return OperationCode::UMul;
47 case OperationCode::IDiv:
48 return OperationCode::UDiv;
49 case OperationCode::IMin:
50 return OperationCode::UMin;
51 case OperationCode::IMax:
52 return OperationCode::UMax;
53 case OperationCode::ICastFloat:
54 return OperationCode::UCastFloat;
55 case OperationCode::ICastUnsigned:
56 return OperationCode::UCastSigned;
57 case OperationCode::ILogicalShiftLeft:
58 return OperationCode::ULogicalShiftLeft;
59 case OperationCode::ILogicalShiftRight:
60 return OperationCode::ULogicalShiftRight;
61 case OperationCode::IArithmeticShiftRight:
62 return OperationCode::UArithmeticShiftRight;
63 case OperationCode::IBitwiseAnd:
64 return OperationCode::UBitwiseAnd;
65 case OperationCode::IBitwiseOr:
66 return OperationCode::UBitwiseOr;
67 case OperationCode::IBitwiseXor:
68 return OperationCode::UBitwiseXor;
69 case OperationCode::IBitwiseNot:
70 return OperationCode::UBitwiseNot;
71 case OperationCode::IBitfieldExtract:
72 return OperationCode::UBitfieldExtract;
73 case OperationCode::IBitfieldInsert:
74 return OperationCode::UBitfieldInsert;
75 case OperationCode::IBitCount:
76 return OperationCode::UBitCount;
77 case OperationCode::LogicalILessThan:
78 return OperationCode::LogicalULessThan;
79 case OperationCode::LogicalIEqual:
80 return OperationCode::LogicalUEqual;
81 case OperationCode::LogicalILessEqual:
82 return OperationCode::LogicalULessEqual;
83 case OperationCode::LogicalIGreaterThan:
84 return OperationCode::LogicalUGreaterThan;
85 case OperationCode::LogicalINotEqual:
86 return OperationCode::LogicalUNotEqual;
87 case OperationCode::LogicalIGreaterEqual:
88 return OperationCode::LogicalUGreaterEqual;
89 case OperationCode::AtomicIExchange:
90 return OperationCode::AtomicUExchange;
91 case OperationCode::AtomicIAdd:
92 return OperationCode::AtomicUAdd;
93 case OperationCode::AtomicIMin:
94 return OperationCode::AtomicUMin;
95 case OperationCode::AtomicIMax:
96 return OperationCode::AtomicUMax;
97 case OperationCode::AtomicIAnd:
98 return OperationCode::AtomicUAnd;
99 case OperationCode::AtomicIOr:
100 return OperationCode::AtomicUOr;
101 case OperationCode::AtomicIXor:
102 return OperationCode::AtomicUXor;
103 case OperationCode::INegate:
104 UNREACHABLE_MSG("Can't negate an unsigned integer");
105 return {};
106 case OperationCode::IAbsolute:
107 UNREACHABLE_MSG("Can't apply absolute to an unsigned integer");
108 return {};
109 default:
110 UNREACHABLE_MSG("Unknown signed operation with code={}", operation_code);
111 return {};
112 }
113}
114
115} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/node_helper.h b/src/video_core/shader/node_helper.h
deleted file mode 100644
index 1e0886185..000000000
--- a/src/video_core/shader/node_helper.h
+++ /dev/null
@@ -1,71 +0,0 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <memory>
8#include <string>
9#include <tuple>
10#include <type_traits>
11#include <utility>
12#include <vector>
13
14#include "common/common_types.h"
15#include "video_core/shader/node.h"
16
17namespace VideoCommon::Shader {
18
19/// This arithmetic operation cannot be constraint
20inline constexpr MetaArithmetic PRECISE = {true};
21/// This arithmetic operation can be optimized away
22inline constexpr MetaArithmetic NO_PRECISE = {false};
23
24/// Creates a conditional node
25Node Conditional(Node condition, std::vector<Node> code);
26
27/// Creates a commentary node
28Node Comment(std::string text);
29
30/// Creates an u32 immediate
31Node Immediate(u32 value);
32
33/// Creates a s32 immediate
34Node Immediate(s32 value);
35
36/// Creates a f32 immediate
37Node Immediate(f32 value);
38
39/// Converts an signed operation code to an unsigned operation code
40OperationCode SignedToUnsignedCode(OperationCode operation_code, bool is_signed);
41
42template <typename T, typename... Args>
43Node MakeNode(Args&&... args) {
44 static_assert(std::is_convertible_v<T, NodeData>);
45 return std::make_shared<NodeData>(T(std::forward<Args>(args)...));
46}
47
48template <typename T, typename... Args>
49TrackSampler MakeTrackSampler(Args&&... args) {
50 static_assert(std::is_convertible_v<T, TrackSamplerData>);
51 return std::make_shared<TrackSamplerData>(T{std::forward<Args>(args)...});
52}
53
54template <typename... Args>
55Node Operation(OperationCode code, Args&&... args) {
56 if constexpr (sizeof...(args) == 0) {
57 return MakeNode<OperationNode>(code);
58 } else if constexpr (std::is_convertible_v<std::tuple_element_t<0, std::tuple<Args...>>,
59 Meta>) {
60 return MakeNode<OperationNode>(code, std::forward<Args>(args)...);
61 } else {
62 return MakeNode<OperationNode>(code, Meta{}, std::forward<Args>(args)...);
63 }
64}
65
66template <typename... Args>
67Node SignedOperation(OperationCode code, bool is_signed, Args&&... args) {
68 return Operation(SignedToUnsignedCode(code, is_signed), std::forward<Args>(args)...);
69}
70
71} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/registry.cpp b/src/video_core/shader/registry.cpp
deleted file mode 100644
index 148d91fcb..000000000
--- a/src/video_core/shader/registry.cpp
+++ /dev/null
@@ -1,181 +0,0 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <algorithm>
6#include <tuple>
7
8#include "common/assert.h"
9#include "common/common_types.h"
10#include "video_core/engines/kepler_compute.h"
11#include "video_core/engines/maxwell_3d.h"
12#include "video_core/engines/shader_type.h"
13#include "video_core/shader/registry.h"
14
15namespace VideoCommon::Shader {
16
17using Tegra::Engines::ConstBufferEngineInterface;
18using Tegra::Engines::SamplerDescriptor;
19using Tegra::Engines::ShaderType;
20
21namespace {
22
23GraphicsInfo MakeGraphicsInfo(ShaderType shader_stage, ConstBufferEngineInterface& engine) {
24 if (shader_stage == ShaderType::Compute) {
25 return {};
26 }
27
28 auto& graphics = dynamic_cast<Tegra::Engines::Maxwell3D&>(engine);
29
30 return {
31 .tfb_layouts = graphics.regs.tfb_layouts,
32 .tfb_varying_locs = graphics.regs.tfb_varying_locs,
33 .primitive_topology = graphics.regs.draw.topology,
34 .tessellation_primitive = graphics.regs.tess_mode.prim,
35 .tessellation_spacing = graphics.regs.tess_mode.spacing,
36 .tfb_enabled = graphics.regs.tfb_enabled != 0,
37 .tessellation_clockwise = graphics.regs.tess_mode.cw.Value() != 0,
38 };
39}
40
41ComputeInfo MakeComputeInfo(ShaderType shader_stage, ConstBufferEngineInterface& engine) {
42 if (shader_stage != ShaderType::Compute) {
43 return {};
44 }
45
46 auto& compute = dynamic_cast<Tegra::Engines::KeplerCompute&>(engine);
47 const auto& launch = compute.launch_description;
48
49 return {
50 .workgroup_size = {launch.block_dim_x, launch.block_dim_y, launch.block_dim_z},
51 .shared_memory_size_in_words = launch.shared_alloc,
52 .local_memory_size_in_words = launch.local_pos_alloc,
53 };
54}
55
56} // Anonymous namespace
57
58Registry::Registry(ShaderType shader_stage, const SerializedRegistryInfo& info)
59 : stage{shader_stage}, stored_guest_driver_profile{info.guest_driver_profile},
60 bound_buffer{info.bound_buffer}, graphics_info{info.graphics}, compute_info{info.compute} {}
61
62Registry::Registry(ShaderType shader_stage, ConstBufferEngineInterface& engine_)
63 : stage{shader_stage}, engine{&engine_}, bound_buffer{engine_.GetBoundBuffer()},
64 graphics_info{MakeGraphicsInfo(shader_stage, engine_)}, compute_info{MakeComputeInfo(
65 shader_stage, engine_)} {}
66
67Registry::~Registry() = default;
68
69std::optional<u32> Registry::ObtainKey(u32 buffer, u32 offset) {
70 const std::pair<u32, u32> key = {buffer, offset};
71 const auto iter = keys.find(key);
72 if (iter != keys.end()) {
73 return iter->second;
74 }
75 if (!engine) {
76 return std::nullopt;
77 }
78 const u32 value = engine->AccessConstBuffer32(stage, buffer, offset);
79 keys.emplace(key, value);
80 return value;
81}
82
83std::optional<SamplerDescriptor> Registry::ObtainBoundSampler(u32 offset) {
84 const u32 key = offset;
85 const auto iter = bound_samplers.find(key);
86 if (iter != bound_samplers.end()) {
87 return iter->second;
88 }
89 if (!engine) {
90 return std::nullopt;
91 }
92 const SamplerDescriptor value = engine->AccessBoundSampler(stage, offset);
93 bound_samplers.emplace(key, value);
94 return value;
95}
96
97std::optional<Tegra::Engines::SamplerDescriptor> Registry::ObtainSeparateSampler(
98 std::pair<u32, u32> buffers, std::pair<u32, u32> offsets) {
99 SeparateSamplerKey key;
100 key.buffers = buffers;
101 key.offsets = offsets;
102 const auto iter = separate_samplers.find(key);
103 if (iter != separate_samplers.end()) {
104 return iter->second;
105 }
106 if (!engine) {
107 return std::nullopt;
108 }
109
110 const u32 handle_1 = engine->AccessConstBuffer32(stage, key.buffers.first, key.offsets.first);
111 const u32 handle_2 = engine->AccessConstBuffer32(stage, key.buffers.second, key.offsets.second);
112 const SamplerDescriptor value = engine->AccessSampler(handle_1 | handle_2);
113 separate_samplers.emplace(key, value);
114 return value;
115}
116
117std::optional<SamplerDescriptor> Registry::ObtainBindlessSampler(u32 buffer, u32 offset) {
118 const std::pair key = {buffer, offset};
119 const auto iter = bindless_samplers.find(key);
120 if (iter != bindless_samplers.end()) {
121 return iter->second;
122 }
123 if (!engine) {
124 return std::nullopt;
125 }
126 const SamplerDescriptor value = engine->AccessBindlessSampler(stage, buffer, offset);
127 bindless_samplers.emplace(key, value);
128 return value;
129}
130
131void Registry::InsertKey(u32 buffer, u32 offset, u32 value) {
132 keys.insert_or_assign({buffer, offset}, value);
133}
134
135void Registry::InsertBoundSampler(u32 offset, SamplerDescriptor sampler) {
136 bound_samplers.insert_or_assign(offset, sampler);
137}
138
139void Registry::InsertBindlessSampler(u32 buffer, u32 offset, SamplerDescriptor sampler) {
140 bindless_samplers.insert_or_assign({buffer, offset}, sampler);
141}
142
143bool Registry::IsConsistent() const {
144 if (!engine) {
145 return true;
146 }
147 return std::all_of(keys.begin(), keys.end(),
148 [this](const auto& pair) {
149 const auto [cbuf, offset] = pair.first;
150 const auto value = pair.second;
151 return value == engine->AccessConstBuffer32(stage, cbuf, offset);
152 }) &&
153 std::all_of(bound_samplers.begin(), bound_samplers.end(),
154 [this](const auto& sampler) {
155 const auto [key, value] = sampler;
156 return value == engine->AccessBoundSampler(stage, key);
157 }) &&
158 std::all_of(bindless_samplers.begin(), bindless_samplers.end(),
159 [this](const auto& sampler) {
160 const auto [cbuf, offset] = sampler.first;
161 const auto value = sampler.second;
162 return value == engine->AccessBindlessSampler(stage, cbuf, offset);
163 });
164}
165
166bool Registry::HasEqualKeys(const Registry& rhs) const {
167 return std::tie(keys, bound_samplers, bindless_samplers) ==
168 std::tie(rhs.keys, rhs.bound_samplers, rhs.bindless_samplers);
169}
170
171const GraphicsInfo& Registry::GetGraphicsInfo() const {
172 ASSERT(stage != Tegra::Engines::ShaderType::Compute);
173 return graphics_info;
174}
175
176const ComputeInfo& Registry::GetComputeInfo() const {
177 ASSERT(stage == Tegra::Engines::ShaderType::Compute);
178 return compute_info;
179}
180
181} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/registry.h b/src/video_core/shader/registry.h
deleted file mode 100644
index 4bebefdde..000000000
--- a/src/video_core/shader/registry.h
+++ /dev/null
@@ -1,172 +0,0 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <array>
8#include <optional>
9#include <type_traits>
10#include <unordered_map>
11#include <utility>
12
13#include "common/common_types.h"
14#include "common/hash.h"
15#include "video_core/engines/const_buffer_engine_interface.h"
16#include "video_core/engines/maxwell_3d.h"
17#include "video_core/engines/shader_type.h"
18#include "video_core/guest_driver.h"
19
20namespace VideoCommon::Shader {
21
22struct SeparateSamplerKey {
23 std::pair<u32, u32> buffers;
24 std::pair<u32, u32> offsets;
25};
26
27} // namespace VideoCommon::Shader
28
29namespace std {
30
31template <>
32struct hash<VideoCommon::Shader::SeparateSamplerKey> {
33 std::size_t operator()(const VideoCommon::Shader::SeparateSamplerKey& key) const noexcept {
34 return std::hash<u32>{}(key.buffers.first ^ key.buffers.second ^ key.offsets.first ^
35 key.offsets.second);
36 }
37};
38
39template <>
40struct equal_to<VideoCommon::Shader::SeparateSamplerKey> {
41 bool operator()(const VideoCommon::Shader::SeparateSamplerKey& lhs,
42 const VideoCommon::Shader::SeparateSamplerKey& rhs) const noexcept {
43 return lhs.buffers == rhs.buffers && lhs.offsets == rhs.offsets;
44 }
45};
46
47} // namespace std
48
49namespace VideoCommon::Shader {
50
51using KeyMap = std::unordered_map<std::pair<u32, u32>, u32, Common::PairHash>;
52using BoundSamplerMap = std::unordered_map<u32, Tegra::Engines::SamplerDescriptor>;
53using SeparateSamplerMap =
54 std::unordered_map<SeparateSamplerKey, Tegra::Engines::SamplerDescriptor>;
55using BindlessSamplerMap =
56 std::unordered_map<std::pair<u32, u32>, Tegra::Engines::SamplerDescriptor, Common::PairHash>;
57
58struct GraphicsInfo {
59 using Maxwell = Tegra::Engines::Maxwell3D::Regs;
60
61 std::array<Maxwell::TransformFeedbackLayout, Maxwell::NumTransformFeedbackBuffers>
62 tfb_layouts{};
63 std::array<std::array<u8, 128>, Maxwell::NumTransformFeedbackBuffers> tfb_varying_locs{};
64 Maxwell::PrimitiveTopology primitive_topology{};
65 Maxwell::TessellationPrimitive tessellation_primitive{};
66 Maxwell::TessellationSpacing tessellation_spacing{};
67 bool tfb_enabled = false;
68 bool tessellation_clockwise = false;
69};
70static_assert(std::is_trivially_copyable_v<GraphicsInfo> &&
71 std::is_standard_layout_v<GraphicsInfo>);
72
73struct ComputeInfo {
74 std::array<u32, 3> workgroup_size{};
75 u32 shared_memory_size_in_words = 0;
76 u32 local_memory_size_in_words = 0;
77};
78static_assert(std::is_trivially_copyable_v<ComputeInfo> && std::is_standard_layout_v<ComputeInfo>);
79
80struct SerializedRegistryInfo {
81 VideoCore::GuestDriverProfile guest_driver_profile;
82 u32 bound_buffer = 0;
83 GraphicsInfo graphics;
84 ComputeInfo compute;
85};
86
87/**
88 * The Registry is a class use to interface the 3D and compute engines with the shader compiler.
89 * With it, the shader can obtain required data from GPU state and store it for disk shader
90 * compilation.
91 */
92class Registry {
93public:
94 explicit Registry(Tegra::Engines::ShaderType shader_stage, const SerializedRegistryInfo& info);
95
96 explicit Registry(Tegra::Engines::ShaderType shader_stage,
97 Tegra::Engines::ConstBufferEngineInterface& engine_);
98
99 ~Registry();
100
101 /// Retrieves a key from the registry, if it's registered, it will give the registered value, if
102 /// not it will obtain it from maxwell3d and register it.
103 std::optional<u32> ObtainKey(u32 buffer, u32 offset);
104
105 std::optional<Tegra::Engines::SamplerDescriptor> ObtainBoundSampler(u32 offset);
106
107 std::optional<Tegra::Engines::SamplerDescriptor> ObtainSeparateSampler(
108 std::pair<u32, u32> buffers, std::pair<u32, u32> offsets);
109
110 std::optional<Tegra::Engines::SamplerDescriptor> ObtainBindlessSampler(u32 buffer, u32 offset);
111
112 /// Inserts a key.
113 void InsertKey(u32 buffer, u32 offset, u32 value);
114
115 /// Inserts a bound sampler key.
116 void InsertBoundSampler(u32 offset, Tegra::Engines::SamplerDescriptor sampler);
117
118 /// Inserts a bindless sampler key.
119 void InsertBindlessSampler(u32 buffer, u32 offset, Tegra::Engines::SamplerDescriptor sampler);
120
121 /// Checks keys and samplers against engine's current const buffers.
122 /// Returns true if they are the same value, false otherwise.
123 bool IsConsistent() const;
124
125 /// Returns true if the keys are equal to the other ones in the registry.
126 bool HasEqualKeys(const Registry& rhs) const;
127
128 /// Returns graphics information from this shader
129 const GraphicsInfo& GetGraphicsInfo() const;
130
131 /// Returns compute information from this shader
132 const ComputeInfo& GetComputeInfo() const;
133
134 /// Gives an getter to the const buffer keys in the database.
135 const KeyMap& GetKeys() const {
136 return keys;
137 }
138
139 /// Gets samplers database.
140 const BoundSamplerMap& GetBoundSamplers() const {
141 return bound_samplers;
142 }
143
144 /// Gets bindless samplers database.
145 const BindlessSamplerMap& GetBindlessSamplers() const {
146 return bindless_samplers;
147 }
148
149 /// Gets bound buffer used on this shader
150 u32 GetBoundBuffer() const {
151 return bound_buffer;
152 }
153
154 /// Obtains access to the guest driver's profile.
155 VideoCore::GuestDriverProfile& AccessGuestDriverProfile() {
156 return engine ? engine->AccessGuestDriverProfile() : stored_guest_driver_profile;
157 }
158
159private:
160 const Tegra::Engines::ShaderType stage;
161 VideoCore::GuestDriverProfile stored_guest_driver_profile;
162 Tegra::Engines::ConstBufferEngineInterface* engine = nullptr;
163 KeyMap keys;
164 BoundSamplerMap bound_samplers;
165 SeparateSamplerMap separate_samplers;
166 BindlessSamplerMap bindless_samplers;
167 u32 bound_buffer;
168 GraphicsInfo graphics_info;
169 ComputeInfo compute_info;
170};
171
172} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/shader_ir.cpp b/src/video_core/shader/shader_ir.cpp
deleted file mode 100644
index a4987ffc6..000000000
--- a/src/video_core/shader/shader_ir.cpp
+++ /dev/null
@@ -1,464 +0,0 @@
1// Copyright 2018 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <algorithm>
6#include <array>
7#include <cmath>
8
9#include "common/assert.h"
10#include "common/common_types.h"
11#include "common/logging/log.h"
12#include "video_core/engines/shader_bytecode.h"
13#include "video_core/shader/node.h"
14#include "video_core/shader/node_helper.h"
15#include "video_core/shader/registry.h"
16#include "video_core/shader/shader_ir.h"
17
18namespace VideoCommon::Shader {
19
20using Tegra::Shader::Attribute;
21using Tegra::Shader::Instruction;
22using Tegra::Shader::IpaMode;
23using Tegra::Shader::Pred;
24using Tegra::Shader::PredCondition;
25using Tegra::Shader::PredOperation;
26using Tegra::Shader::Register;
27
28ShaderIR::ShaderIR(const ProgramCode& program_code_, u32 main_offset_, CompilerSettings settings_,
29 Registry& registry_)
30 : program_code{program_code_}, main_offset{main_offset_}, settings{settings_}, registry{
31 registry_} {
32 Decode();
33 PostDecode();
34}
35
36ShaderIR::~ShaderIR() = default;
37
38Node ShaderIR::GetRegister(Register reg) {
39 if (reg != Register::ZeroIndex) {
40 used_registers.insert(static_cast<u32>(reg));
41 }
42 return MakeNode<GprNode>(reg);
43}
44
45Node ShaderIR::GetCustomVariable(u32 id) {
46 return MakeNode<CustomVarNode>(id);
47}
48
49Node ShaderIR::GetImmediate19(Instruction instr) {
50 return Immediate(instr.alu.GetImm20_19());
51}
52
53Node ShaderIR::GetImmediate32(Instruction instr) {
54 return Immediate(instr.alu.GetImm20_32());
55}
56
57Node ShaderIR::GetConstBuffer(u64 index_, u64 offset_) {
58 const auto index = static_cast<u32>(index_);
59 const auto offset = static_cast<u32>(offset_);
60
61 used_cbufs.try_emplace(index).first->second.MarkAsUsed(offset);
62
63 return MakeNode<CbufNode>(index, Immediate(offset));
64}
65
66Node ShaderIR::GetConstBufferIndirect(u64 index_, u64 offset_, Node node) {
67 const auto index = static_cast<u32>(index_);
68 const auto offset = static_cast<u32>(offset_);
69
70 used_cbufs.try_emplace(index).first->second.MarkAsUsedIndirect();
71
72 Node final_offset = [&] {
73 // Attempt to inline constant buffer without a variable offset. This is done to allow
74 // tracking LDC calls.
75 if (const auto gpr = std::get_if<GprNode>(&*node)) {
76 if (gpr->GetIndex() == Register::ZeroIndex) {
77 return Immediate(offset);
78 }
79 }
80 return Operation(OperationCode::UAdd, NO_PRECISE, std::move(node), Immediate(offset));
81 }();
82 return MakeNode<CbufNode>(index, std::move(final_offset));
83}
84
85Node ShaderIR::GetPredicate(u64 pred_, bool negated) {
86 const auto pred = static_cast<Pred>(pred_);
87 if (pred != Pred::UnusedIndex && pred != Pred::NeverExecute) {
88 used_predicates.insert(pred);
89 }
90
91 return MakeNode<PredicateNode>(pred, negated);
92}
93
94Node ShaderIR::GetPredicate(bool immediate) {
95 return GetPredicate(static_cast<u64>(immediate ? Pred::UnusedIndex : Pred::NeverExecute));
96}
97
98Node ShaderIR::GetInputAttribute(Attribute::Index index, u64 element, Node buffer) {
99 MarkAttributeUsage(index, element);
100 used_input_attributes.emplace(index);
101 return MakeNode<AbufNode>(index, static_cast<u32>(element), std::move(buffer));
102}
103
104Node ShaderIR::GetPhysicalInputAttribute(Tegra::Shader::Register physical_address, Node buffer) {
105 uses_physical_attributes = true;
106 return MakeNode<AbufNode>(GetRegister(physical_address), buffer);
107}
108
109Node ShaderIR::GetOutputAttribute(Attribute::Index index, u64 element, Node buffer) {
110 MarkAttributeUsage(index, element);
111 used_output_attributes.insert(index);
112 return MakeNode<AbufNode>(index, static_cast<u32>(element), std::move(buffer));
113}
114
115Node ShaderIR::GetInternalFlag(InternalFlag flag, bool negated) const {
116 Node node = MakeNode<InternalFlagNode>(flag);
117 if (negated) {
118 return Operation(OperationCode::LogicalNegate, std::move(node));
119 }
120 return node;
121}
122
123Node ShaderIR::GetLocalMemory(Node address) {
124 return MakeNode<LmemNode>(std::move(address));
125}
126
127Node ShaderIR::GetSharedMemory(Node address) {
128 return MakeNode<SmemNode>(std::move(address));
129}
130
131Node ShaderIR::GetTemporary(u32 id) {
132 return GetRegister(Register::ZeroIndex + 1 + id);
133}
134
135Node ShaderIR::GetOperandAbsNegFloat(Node value, bool absolute, bool negate) {
136 if (absolute) {
137 value = Operation(OperationCode::FAbsolute, NO_PRECISE, std::move(value));
138 }
139 if (negate) {
140 value = Operation(OperationCode::FNegate, NO_PRECISE, std::move(value));
141 }
142 return value;
143}
144
145Node ShaderIR::GetSaturatedFloat(Node value, bool saturate) {
146 if (!saturate) {
147 return value;
148 }
149
150 Node positive_zero = Immediate(std::copysignf(0, 1));
151 Node positive_one = Immediate(1.0f);
152 return Operation(OperationCode::FClamp, NO_PRECISE, std::move(value), std::move(positive_zero),
153 std::move(positive_one));
154}
155
156Node ShaderIR::ConvertIntegerSize(Node value, Register::Size size, bool is_signed) {
157 switch (size) {
158 case Register::Size::Byte:
159 value = SignedOperation(OperationCode::ILogicalShiftLeft, is_signed, NO_PRECISE,
160 std::move(value), Immediate(24));
161 value = SignedOperation(OperationCode::IArithmeticShiftRight, is_signed, NO_PRECISE,
162 std::move(value), Immediate(24));
163 return value;
164 case Register::Size::Short:
165 value = SignedOperation(OperationCode::ILogicalShiftLeft, is_signed, NO_PRECISE,
166 std::move(value), Immediate(16));
167 value = SignedOperation(OperationCode::IArithmeticShiftRight, is_signed, NO_PRECISE,
168 std::move(value), Immediate(16));
169 return value;
170 case Register::Size::Word:
171 // Default - do nothing
172 return value;
173 default:
174 UNREACHABLE_MSG("Unimplemented conversion size: {}", size);
175 return value;
176 }
177}
178
179Node ShaderIR::GetOperandAbsNegInteger(Node value, bool absolute, bool negate, bool is_signed) {
180 if (!is_signed) {
181 // Absolute or negate on an unsigned is pointless
182 return value;
183 }
184 if (absolute) {
185 value = Operation(OperationCode::IAbsolute, NO_PRECISE, std::move(value));
186 }
187 if (negate) {
188 value = Operation(OperationCode::INegate, NO_PRECISE, std::move(value));
189 }
190 return value;
191}
192
193Node ShaderIR::UnpackHalfImmediate(Instruction instr, bool has_negation) {
194 Node value = Immediate(instr.half_imm.PackImmediates());
195 if (!has_negation) {
196 return value;
197 }
198
199 Node first_negate = GetPredicate(instr.half_imm.first_negate != 0);
200 Node second_negate = GetPredicate(instr.half_imm.second_negate != 0);
201
202 return Operation(OperationCode::HNegate, NO_PRECISE, std::move(value), std::move(first_negate),
203 std::move(second_negate));
204}
205
206Node ShaderIR::UnpackHalfFloat(Node value, Tegra::Shader::HalfType type) {
207 return Operation(OperationCode::HUnpack, type, std::move(value));
208}
209
210Node ShaderIR::HalfMerge(Node dest, Node src, Tegra::Shader::HalfMerge merge) {
211 switch (merge) {
212 case Tegra::Shader::HalfMerge::H0_H1:
213 return src;
214 case Tegra::Shader::HalfMerge::F32:
215 return Operation(OperationCode::HMergeF32, std::move(src));
216 case Tegra::Shader::HalfMerge::Mrg_H0:
217 return Operation(OperationCode::HMergeH0, std::move(dest), std::move(src));
218 case Tegra::Shader::HalfMerge::Mrg_H1:
219 return Operation(OperationCode::HMergeH1, std::move(dest), std::move(src));
220 }
221 UNREACHABLE();
222 return src;
223}
224
225Node ShaderIR::GetOperandAbsNegHalf(Node value, bool absolute, bool negate) {
226 if (absolute) {
227 value = Operation(OperationCode::HAbsolute, NO_PRECISE, std::move(value));
228 }
229 if (negate) {
230 value = Operation(OperationCode::HNegate, NO_PRECISE, std::move(value), GetPredicate(true),
231 GetPredicate(true));
232 }
233 return value;
234}
235
236Node ShaderIR::GetSaturatedHalfFloat(Node value, bool saturate) {
237 if (!saturate) {
238 return value;
239 }
240
241 Node positive_zero = Immediate(std::copysignf(0, 1));
242 Node positive_one = Immediate(1.0f);
243 return Operation(OperationCode::HClamp, NO_PRECISE, std::move(value), std::move(positive_zero),
244 std::move(positive_one));
245}
246
247Node ShaderIR::GetPredicateComparisonFloat(PredCondition condition, Node op_a, Node op_b) {
248 if (condition == PredCondition::T) {
249 return GetPredicate(true);
250 } else if (condition == PredCondition::F) {
251 return GetPredicate(false);
252 }
253
254 static constexpr std::array comparison_table{
255 OperationCode(0),
256 OperationCode::LogicalFOrdLessThan, // LT
257 OperationCode::LogicalFOrdEqual, // EQ
258 OperationCode::LogicalFOrdLessEqual, // LE
259 OperationCode::LogicalFOrdGreaterThan, // GT
260 OperationCode::LogicalFOrdNotEqual, // NE
261 OperationCode::LogicalFOrdGreaterEqual, // GE
262 OperationCode::LogicalFOrdered, // NUM
263 OperationCode::LogicalFUnordered, // NAN
264 OperationCode::LogicalFUnordLessThan, // LTU
265 OperationCode::LogicalFUnordEqual, // EQU
266 OperationCode::LogicalFUnordLessEqual, // LEU
267 OperationCode::LogicalFUnordGreaterThan, // GTU
268 OperationCode::LogicalFUnordNotEqual, // NEU
269 OperationCode::LogicalFUnordGreaterEqual, // GEU
270 };
271 const std::size_t index = static_cast<std::size_t>(condition);
272 ASSERT_MSG(index < std::size(comparison_table), "Invalid condition={}", index);
273
274 return Operation(comparison_table[index], op_a, op_b);
275}
276
277Node ShaderIR::GetPredicateComparisonInteger(PredCondition condition, bool is_signed, Node op_a,
278 Node op_b) {
279 static constexpr std::array comparison_table{
280 std::pair{PredCondition::LT, OperationCode::LogicalILessThan},
281 std::pair{PredCondition::EQ, OperationCode::LogicalIEqual},
282 std::pair{PredCondition::LE, OperationCode::LogicalILessEqual},
283 std::pair{PredCondition::GT, OperationCode::LogicalIGreaterThan},
284 std::pair{PredCondition::NE, OperationCode::LogicalINotEqual},
285 std::pair{PredCondition::GE, OperationCode::LogicalIGreaterEqual},
286 };
287
288 const auto comparison =
289 std::find_if(comparison_table.cbegin(), comparison_table.cend(),
290 [condition](const auto entry) { return condition == entry.first; });
291 UNIMPLEMENTED_IF_MSG(comparison == comparison_table.cend(),
292 "Unknown predicate comparison operation");
293
294 return SignedOperation(comparison->second, is_signed, NO_PRECISE, std::move(op_a),
295 std::move(op_b));
296}
297
298Node ShaderIR::GetPredicateComparisonHalf(Tegra::Shader::PredCondition condition, Node op_a,
299 Node op_b) {
300 static constexpr std::array comparison_table{
301 std::pair{PredCondition::LT, OperationCode::Logical2HLessThan},
302 std::pair{PredCondition::EQ, OperationCode::Logical2HEqual},
303 std::pair{PredCondition::LE, OperationCode::Logical2HLessEqual},
304 std::pair{PredCondition::GT, OperationCode::Logical2HGreaterThan},
305 std::pair{PredCondition::NE, OperationCode::Logical2HNotEqual},
306 std::pair{PredCondition::GE, OperationCode::Logical2HGreaterEqual},
307 std::pair{PredCondition::LTU, OperationCode::Logical2HLessThanWithNan},
308 std::pair{PredCondition::LEU, OperationCode::Logical2HLessEqualWithNan},
309 std::pair{PredCondition::GTU, OperationCode::Logical2HGreaterThanWithNan},
310 std::pair{PredCondition::NEU, OperationCode::Logical2HNotEqualWithNan},
311 std::pair{PredCondition::GEU, OperationCode::Logical2HGreaterEqualWithNan},
312 };
313
314 const auto comparison =
315 std::find_if(comparison_table.cbegin(), comparison_table.cend(),
316 [condition](const auto entry) { return condition == entry.first; });
317 UNIMPLEMENTED_IF_MSG(comparison == comparison_table.cend(),
318 "Unknown predicate comparison operation");
319
320 return Operation(comparison->second, NO_PRECISE, std::move(op_a), std::move(op_b));
321}
322
323OperationCode ShaderIR::GetPredicateCombiner(PredOperation operation) {
324 static constexpr std::array operation_table{
325 OperationCode::LogicalAnd,
326 OperationCode::LogicalOr,
327 OperationCode::LogicalXor,
328 };
329
330 const auto index = static_cast<std::size_t>(operation);
331 if (index >= operation_table.size()) {
332 UNIMPLEMENTED_MSG("Unknown predicate operation.");
333 return {};
334 }
335
336 return operation_table[index];
337}
338
339Node ShaderIR::GetConditionCode(ConditionCode cc) const {
340 switch (cc) {
341 case ConditionCode::NEU:
342 return GetInternalFlag(InternalFlag::Zero, true);
343 case ConditionCode::FCSM_TR:
344 UNIMPLEMENTED_MSG("EXIT.FCSM_TR is not implemented");
345 return MakeNode<PredicateNode>(Pred::NeverExecute, false);
346 default:
347 UNIMPLEMENTED_MSG("Unimplemented condition code: {}", cc);
348 return MakeNode<PredicateNode>(Pred::NeverExecute, false);
349 }
350}
351
352void ShaderIR::SetRegister(NodeBlock& bb, Register dest, Node src) {
353 bb.push_back(Operation(OperationCode::Assign, GetRegister(dest), std::move(src)));
354}
355
356void ShaderIR::SetPredicate(NodeBlock& bb, u64 dest, Node src) {
357 bb.push_back(Operation(OperationCode::LogicalAssign, GetPredicate(dest), std::move(src)));
358}
359
360void ShaderIR::SetInternalFlag(NodeBlock& bb, InternalFlag flag, Node value) {
361 bb.push_back(Operation(OperationCode::LogicalAssign, GetInternalFlag(flag), std::move(value)));
362}
363
364void ShaderIR::SetLocalMemory(NodeBlock& bb, Node address, Node value) {
365 bb.push_back(
366 Operation(OperationCode::Assign, GetLocalMemory(std::move(address)), std::move(value)));
367}
368
369void ShaderIR::SetSharedMemory(NodeBlock& bb, Node address, Node value) {
370 bb.push_back(
371 Operation(OperationCode::Assign, GetSharedMemory(std::move(address)), std::move(value)));
372}
373
374void ShaderIR::SetTemporary(NodeBlock& bb, u32 id, Node value) {
375 SetRegister(bb, Register::ZeroIndex + 1 + id, std::move(value));
376}
377
378void ShaderIR::SetInternalFlagsFromFloat(NodeBlock& bb, Node value, bool sets_cc) {
379 if (!sets_cc) {
380 return;
381 }
382 Node zerop = Operation(OperationCode::LogicalFOrdEqual, std::move(value), Immediate(0.0f));
383 SetInternalFlag(bb, InternalFlag::Zero, std::move(zerop));
384 LOG_WARNING(HW_GPU, "Condition codes implementation is incomplete");
385}
386
387void ShaderIR::SetInternalFlagsFromInteger(NodeBlock& bb, Node value, bool sets_cc) {
388 if (!sets_cc) {
389 return;
390 }
391 Node zerop = Operation(OperationCode::LogicalIEqual, std::move(value), Immediate(0));
392 SetInternalFlag(bb, InternalFlag::Zero, std::move(zerop));
393 LOG_WARNING(HW_GPU, "Condition codes implementation is incomplete");
394}
395
396Node ShaderIR::BitfieldExtract(Node value, u32 offset, u32 bits) {
397 return Operation(OperationCode::UBitfieldExtract, NO_PRECISE, std::move(value),
398 Immediate(offset), Immediate(bits));
399}
400
401Node ShaderIR::BitfieldInsert(Node base, Node insert, u32 offset, u32 bits) {
402 return Operation(OperationCode::UBitfieldInsert, NO_PRECISE, base, insert, Immediate(offset),
403 Immediate(bits));
404}
405
406void ShaderIR::MarkAttributeUsage(Attribute::Index index, u64 element) {
407 switch (index) {
408 case Attribute::Index::LayerViewportPointSize:
409 switch (element) {
410 case 0:
411 UNIMPLEMENTED();
412 break;
413 case 1:
414 uses_layer = true;
415 break;
416 case 2:
417 uses_viewport_index = true;
418 break;
419 case 3:
420 uses_point_size = true;
421 break;
422 }
423 break;
424 case Attribute::Index::TessCoordInstanceIDVertexID:
425 switch (element) {
426 case 2:
427 uses_instance_id = true;
428 break;
429 case 3:
430 uses_vertex_id = true;
431 break;
432 }
433 break;
434 case Attribute::Index::ClipDistances0123:
435 case Attribute::Index::ClipDistances4567: {
436 const u64 clip_index = (index == Attribute::Index::ClipDistances4567 ? 4 : 0) + element;
437 used_clip_distances.at(clip_index) = true;
438 break;
439 }
440 case Attribute::Index::FrontColor:
441 case Attribute::Index::FrontSecondaryColor:
442 case Attribute::Index::BackColor:
443 case Attribute::Index::BackSecondaryColor:
444 uses_legacy_varyings = true;
445 break;
446 default:
447 if (index >= Attribute::Index::TexCoord_0 && index <= Attribute::Index::TexCoord_7) {
448 uses_legacy_varyings = true;
449 }
450 break;
451 }
452}
453
454std::size_t ShaderIR::DeclareAmend(Node new_amend) {
455 const auto id = amend_code.size();
456 amend_code.push_back(std::move(new_amend));
457 return id;
458}
459
460u32 ShaderIR::NewCustomVariable() {
461 return num_custom_variables++;
462}
463
464} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h
deleted file mode 100644
index 1cd7c14d7..000000000
--- a/src/video_core/shader/shader_ir.h
+++ /dev/null
@@ -1,479 +0,0 @@
1// Copyright 2018 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <array>
8#include <list>
9#include <map>
10#include <optional>
11#include <set>
12#include <tuple>
13#include <vector>
14
15#include "common/common_types.h"
16#include "video_core/engines/maxwell_3d.h"
17#include "video_core/engines/shader_bytecode.h"
18#include "video_core/engines/shader_header.h"
19#include "video_core/shader/ast.h"
20#include "video_core/shader/compiler_settings.h"
21#include "video_core/shader/memory_util.h"
22#include "video_core/shader/node.h"
23#include "video_core/shader/registry.h"
24
25namespace VideoCommon::Shader {
26
27struct ShaderBlock;
28
29constexpr u32 MAX_PROGRAM_LENGTH = 0x1000;
30
31struct ConstBuffer {
32 constexpr explicit ConstBuffer(u32 max_offset_, bool is_indirect_)
33 : max_offset{max_offset_}, is_indirect{is_indirect_} {}
34
35 constexpr ConstBuffer() = default;
36
37 void MarkAsUsed(u64 offset) {
38 max_offset = std::max(max_offset, static_cast<u32>(offset));
39 }
40
41 void MarkAsUsedIndirect() {
42 is_indirect = true;
43 }
44
45 bool IsIndirect() const {
46 return is_indirect;
47 }
48
49 u32 GetSize() const {
50 return max_offset + static_cast<u32>(sizeof(float));
51 }
52
53 u32 GetMaxOffset() const {
54 return max_offset;
55 }
56
57private:
58 u32 max_offset = 0;
59 bool is_indirect = false;
60};
61
62struct GlobalMemoryUsage {
63 bool is_read{};
64 bool is_written{};
65};
66
67class ShaderIR final {
68public:
69 explicit ShaderIR(const ProgramCode& program_code_, u32 main_offset_,
70 CompilerSettings settings_, Registry& registry_);
71 ~ShaderIR();
72
73 const std::map<u32, NodeBlock>& GetBasicBlocks() const {
74 return basic_blocks;
75 }
76
77 const std::set<u32>& GetRegisters() const {
78 return used_registers;
79 }
80
81 const std::set<Tegra::Shader::Pred>& GetPredicates() const {
82 return used_predicates;
83 }
84
85 const std::set<Tegra::Shader::Attribute::Index>& GetInputAttributes() const {
86 return used_input_attributes;
87 }
88
89 const std::set<Tegra::Shader::Attribute::Index>& GetOutputAttributes() const {
90 return used_output_attributes;
91 }
92
93 const std::map<u32, ConstBuffer>& GetConstantBuffers() const {
94 return used_cbufs;
95 }
96
97 const std::list<SamplerEntry>& GetSamplers() const {
98 return used_samplers;
99 }
100
101 const std::list<ImageEntry>& GetImages() const {
102 return used_images;
103 }
104
105 const std::array<bool, Tegra::Engines::Maxwell3D::Regs::NumClipDistances>& GetClipDistances()
106 const {
107 return used_clip_distances;
108 }
109
110 const std::map<GlobalMemoryBase, GlobalMemoryUsage>& GetGlobalMemory() const {
111 return used_global_memory;
112 }
113
114 std::size_t GetLength() const {
115 return static_cast<std::size_t>(coverage_end * sizeof(u64));
116 }
117
118 bool UsesLayer() const {
119 return uses_layer;
120 }
121
122 bool UsesViewportIndex() const {
123 return uses_viewport_index;
124 }
125
126 bool UsesPointSize() const {
127 return uses_point_size;
128 }
129
130 bool UsesInstanceId() const {
131 return uses_instance_id;
132 }
133
134 bool UsesVertexId() const {
135 return uses_vertex_id;
136 }
137
138 bool UsesLegacyVaryings() const {
139 return uses_legacy_varyings;
140 }
141
142 bool UsesYNegate() const {
143 return uses_y_negate;
144 }
145
146 bool UsesWarps() const {
147 return uses_warps;
148 }
149
150 bool HasPhysicalAttributes() const {
151 return uses_physical_attributes;
152 }
153
154 const Tegra::Shader::Header& GetHeader() const {
155 return header;
156 }
157
158 bool IsFlowStackDisabled() const {
159 return disable_flow_stack;
160 }
161
162 bool IsDecompiled() const {
163 return decompiled;
164 }
165
166 const ASTManager& GetASTManager() const {
167 return program_manager;
168 }
169
170 ASTNode GetASTProgram() const {
171 return program_manager.GetProgram();
172 }
173
174 u32 GetASTNumVariables() const {
175 return program_manager.GetVariables();
176 }
177
178 u32 ConvertAddressToNvidiaSpace(u32 address) const {
179 return (address - main_offset) * static_cast<u32>(sizeof(Tegra::Shader::Instruction));
180 }
181
182 /// Returns a condition code evaluated from internal flags
183 Node GetConditionCode(Tegra::Shader::ConditionCode cc) const;
184
185 const Node& GetAmendNode(std::size_t index) const {
186 return amend_code[index];
187 }
188
189 u32 GetNumCustomVariables() const {
190 return num_custom_variables;
191 }
192
193private:
194 friend class ASTDecoder;
195
196 struct SamplerInfo {
197 std::optional<Tegra::Shader::TextureType> type;
198 std::optional<bool> is_array;
199 std::optional<bool> is_shadow;
200 std::optional<bool> is_buffer;
201
202 constexpr bool IsComplete() const noexcept {
203 return type && is_array && is_shadow && is_buffer;
204 }
205 };
206
207 void Decode();
208 void PostDecode();
209
210 NodeBlock DecodeRange(u32 begin, u32 end);
211 void DecodeRangeInner(NodeBlock& bb, u32 begin, u32 end);
212 void InsertControlFlow(NodeBlock& bb, const ShaderBlock& block);
213
214 /**
215 * Decodes a single instruction from Tegra to IR.
216 * @param bb Basic block where the nodes will be written to.
217 * @param pc Program counter. Offset to decode.
218 * @return Next address to decode.
219 */
220 u32 DecodeInstr(NodeBlock& bb, u32 pc);
221
222 u32 DecodeArithmetic(NodeBlock& bb, u32 pc);
223 u32 DecodeArithmeticImmediate(NodeBlock& bb, u32 pc);
224 u32 DecodeBfe(NodeBlock& bb, u32 pc);
225 u32 DecodeBfi(NodeBlock& bb, u32 pc);
226 u32 DecodeShift(NodeBlock& bb, u32 pc);
227 u32 DecodeArithmeticInteger(NodeBlock& bb, u32 pc);
228 u32 DecodeArithmeticIntegerImmediate(NodeBlock& bb, u32 pc);
229 u32 DecodeArithmeticHalf(NodeBlock& bb, u32 pc);
230 u32 DecodeArithmeticHalfImmediate(NodeBlock& bb, u32 pc);
231 u32 DecodeFfma(NodeBlock& bb, u32 pc);
232 u32 DecodeHfma2(NodeBlock& bb, u32 pc);
233 u32 DecodeConversion(NodeBlock& bb, u32 pc);
234 u32 DecodeWarp(NodeBlock& bb, u32 pc);
235 u32 DecodeMemory(NodeBlock& bb, u32 pc);
236 u32 DecodeTexture(NodeBlock& bb, u32 pc);
237 u32 DecodeImage(NodeBlock& bb, u32 pc);
238 u32 DecodeFloatSetPredicate(NodeBlock& bb, u32 pc);
239 u32 DecodeIntegerSetPredicate(NodeBlock& bb, u32 pc);
240 u32 DecodeHalfSetPredicate(NodeBlock& bb, u32 pc);
241 u32 DecodePredicateSetRegister(NodeBlock& bb, u32 pc);
242 u32 DecodePredicateSetPredicate(NodeBlock& bb, u32 pc);
243 u32 DecodeRegisterSetPredicate(NodeBlock& bb, u32 pc);
244 u32 DecodeFloatSet(NodeBlock& bb, u32 pc);
245 u32 DecodeIntegerSet(NodeBlock& bb, u32 pc);
246 u32 DecodeHalfSet(NodeBlock& bb, u32 pc);
247 u32 DecodeVideo(NodeBlock& bb, u32 pc);
248 u32 DecodeXmad(NodeBlock& bb, u32 pc);
249 u32 DecodeOther(NodeBlock& bb, u32 pc);
250
251 /// Generates a node for a passed register.
252 Node GetRegister(Tegra::Shader::Register reg);
253 /// Generates a node for a custom variable
254 Node GetCustomVariable(u32 id);
255 /// Generates a node representing a 19-bit immediate value
256 Node GetImmediate19(Tegra::Shader::Instruction instr);
257 /// Generates a node representing a 32-bit immediate value
258 Node GetImmediate32(Tegra::Shader::Instruction instr);
259 /// Generates a node representing a constant buffer
260 Node GetConstBuffer(u64 index, u64 offset);
261 /// Generates a node representing a constant buffer with a variadic offset
262 Node GetConstBufferIndirect(u64 index, u64 offset, Node node);
263 /// Generates a node for a passed predicate. It can be optionally negated
264 Node GetPredicate(u64 pred, bool negated = false);
265 /// Generates a predicate node for an immediate true or false value
266 Node GetPredicate(bool immediate);
267 /// Generates a node representing an input attribute. Keeps track of used attributes.
268 Node GetInputAttribute(Tegra::Shader::Attribute::Index index, u64 element, Node buffer = {});
269 /// Generates a node representing a physical input attribute.
270 Node GetPhysicalInputAttribute(Tegra::Shader::Register physical_address, Node buffer = {});
271 /// Generates a node representing an output attribute. Keeps track of used attributes.
272 Node GetOutputAttribute(Tegra::Shader::Attribute::Index index, u64 element, Node buffer);
273 /// Generates a node representing an internal flag
274 Node GetInternalFlag(InternalFlag flag, bool negated = false) const;
275 /// Generates a node representing a local memory address
276 Node GetLocalMemory(Node address);
277 /// Generates a node representing a shared memory address
278 Node GetSharedMemory(Node address);
279 /// Generates a temporary, internally it uses a post-RZ register
280 Node GetTemporary(u32 id);
281
282 /// Sets a register. src value must be a number-evaluated node.
283 void SetRegister(NodeBlock& bb, Tegra::Shader::Register dest, Node src);
284 /// Sets a predicate. src value must be a bool-evaluated node
285 void SetPredicate(NodeBlock& bb, u64 dest, Node src);
286 /// Sets an internal flag. src value must be a bool-evaluated node
287 void SetInternalFlag(NodeBlock& bb, InternalFlag flag, Node value);
288 /// Sets a local memory address with a value.
289 void SetLocalMemory(NodeBlock& bb, Node address, Node value);
290 /// Sets a shared memory address with a value.
291 void SetSharedMemory(NodeBlock& bb, Node address, Node value);
292 /// Sets a temporary. Internally it uses a post-RZ register
293 void SetTemporary(NodeBlock& bb, u32 id, Node value);
294
295 /// Sets internal flags from a float
296 void SetInternalFlagsFromFloat(NodeBlock& bb, Node value, bool sets_cc = true);
297 /// Sets internal flags from an integer
298 void SetInternalFlagsFromInteger(NodeBlock& bb, Node value, bool sets_cc = true);
299
300 /// Conditionally absolute/negated float. Absolute is applied first
301 Node GetOperandAbsNegFloat(Node value, bool absolute, bool negate);
302 /// Conditionally saturates a float
303 Node GetSaturatedFloat(Node value, bool saturate = true);
304
305 /// Converts an integer to different sizes.
306 Node ConvertIntegerSize(Node value, Tegra::Shader::Register::Size size, bool is_signed);
307 /// Conditionally absolute/negated integer. Absolute is applied first
308 Node GetOperandAbsNegInteger(Node value, bool absolute, bool negate, bool is_signed);
309
310 /// Unpacks a half immediate from an instruction
311 Node UnpackHalfImmediate(Tegra::Shader::Instruction instr, bool has_negation);
312 /// Unpacks a binary value into a half float pair with a type format
313 Node UnpackHalfFloat(Node value, Tegra::Shader::HalfType type);
314 /// Merges a half pair into another value
315 Node HalfMerge(Node dest, Node src, Tegra::Shader::HalfMerge merge);
316 /// Conditionally absolute/negated half float pair. Absolute is applied first
317 Node GetOperandAbsNegHalf(Node value, bool absolute, bool negate);
318 /// Conditionally saturates a half float pair
319 Node GetSaturatedHalfFloat(Node value, bool saturate = true);
320
321 /// Get image component value by type and size
322 std::pair<Node, bool> GetComponentValue(Tegra::Texture::ComponentType component_type,
323 u32 component_size, Node original_value);
324
325 /// Returns a predicate comparing two floats
326 Node GetPredicateComparisonFloat(Tegra::Shader::PredCondition condition, Node op_a, Node op_b);
327 /// Returns a predicate comparing two integers
328 Node GetPredicateComparisonInteger(Tegra::Shader::PredCondition condition, bool is_signed,
329 Node op_a, Node op_b);
330 /// Returns a predicate comparing two half floats. meta consumes how both pairs will be compared
331 Node GetPredicateComparisonHalf(Tegra::Shader::PredCondition condition, Node op_a, Node op_b);
332
333 /// Returns a predicate combiner operation
334 OperationCode GetPredicateCombiner(Tegra::Shader::PredOperation operation);
335
336 /// Queries the missing sampler info from the execution context.
337 SamplerInfo GetSamplerInfo(SamplerInfo info,
338 std::optional<Tegra::Engines::SamplerDescriptor> sampler);
339
340 /// Accesses a texture sampler.
341 std::optional<SamplerEntry> GetSampler(Tegra::Shader::Sampler sampler, SamplerInfo info);
342
343 /// Accesses a texture sampler for a bindless texture.
344 std::optional<SamplerEntry> GetBindlessSampler(Tegra::Shader::Register reg, SamplerInfo info,
345 Node& index_var);
346
347 /// Accesses an image.
348 ImageEntry& GetImage(Tegra::Shader::Image image, Tegra::Shader::ImageType type);
349
350 /// Access a bindless image sampler.
351 ImageEntry& GetBindlessImage(Tegra::Shader::Register reg, Tegra::Shader::ImageType type);
352
353 /// Extracts a sequence of bits from a node
354 Node BitfieldExtract(Node value, u32 offset, u32 bits);
355
356 /// Inserts a sequence of bits from a node
357 Node BitfieldInsert(Node base, Node insert, u32 offset, u32 bits);
358
359 /// Marks the usage of a input or output attribute.
360 void MarkAttributeUsage(Tegra::Shader::Attribute::Index index, u64 element);
361
362 /// Decodes VMNMX instruction and inserts its code into the passed basic block.
363 void DecodeVMNMX(NodeBlock& bb, Tegra::Shader::Instruction instr);
364
365 void WriteTexInstructionFloat(NodeBlock& bb, Tegra::Shader::Instruction instr,
366 const Node4& components);
367
368 void WriteTexsInstructionFloat(NodeBlock& bb, Tegra::Shader::Instruction instr,
369 const Node4& components, bool ignore_mask = false);
370 void WriteTexsInstructionHalfFloat(NodeBlock& bb, Tegra::Shader::Instruction instr,
371 const Node4& components, bool ignore_mask = false);
372
373 Node4 GetTexCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type,
374 Tegra::Shader::TextureProcessMode process_mode, bool depth_compare,
375 bool is_array, bool is_aoffi,
376 std::optional<Tegra::Shader::Register> bindless_reg);
377
378 Node4 GetTexsCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type,
379 Tegra::Shader::TextureProcessMode process_mode, bool depth_compare,
380 bool is_array);
381
382 Node4 GetTld4Code(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type,
383 bool depth_compare, bool is_array, bool is_aoffi, bool is_ptp,
384 bool is_bindless);
385
386 Node4 GetTldCode(Tegra::Shader::Instruction instr);
387
388 Node4 GetTldsCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type,
389 bool is_array);
390
391 std::tuple<std::size_t, std::size_t> ValidateAndGetCoordinateElement(
392 Tegra::Shader::TextureType texture_type, bool depth_compare, bool is_array,
393 bool lod_bias_enabled, std::size_t max_coords, std::size_t max_inputs);
394
395 std::vector<Node> GetAoffiCoordinates(Node aoffi_reg, std::size_t coord_count, bool is_tld4);
396
397 std::vector<Node> GetPtpCoordinates(std::array<Node, 2> ptp_regs);
398
399 Node4 GetTextureCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type,
400 Tegra::Shader::TextureProcessMode process_mode, std::vector<Node> coords,
401 Node array, Node depth_compare, u32 bias_offset, std::vector<Node> aoffi,
402 std::optional<Tegra::Shader::Register> bindless_reg);
403
404 Node GetVideoOperand(Node op, bool is_chunk, bool is_signed, Tegra::Shader::VideoType type,
405 u64 byte_height);
406
407 void WriteLogicOperation(NodeBlock& bb, Tegra::Shader::Register dest,
408 Tegra::Shader::LogicOperation logic_op, Node op_a, Node op_b,
409 Tegra::Shader::PredicateResultMode predicate_mode,
410 Tegra::Shader::Pred predicate, bool sets_cc);
411 void WriteLop3Instruction(NodeBlock& bb, Tegra::Shader::Register dest, Node op_a, Node op_b,
412 Node op_c, Node imm_lut, bool sets_cc);
413
414 std::tuple<Node, u32, u32> TrackCbuf(Node tracked, const NodeBlock& code, s64 cursor) const;
415
416 std::pair<Node, TrackSampler> TrackBindlessSampler(Node tracked, const NodeBlock& code,
417 s64 cursor);
418
419 std::pair<Node, TrackSampler> HandleBindlessIndirectRead(const CbufNode& cbuf,
420 const OperationNode& operation,
421 Node gpr, Node base_offset,
422 Node tracked, const NodeBlock& code,
423 s64 cursor);
424
425 std::optional<u32> TrackImmediate(Node tracked, const NodeBlock& code, s64 cursor) const;
426
427 std::pair<Node, s64> TrackRegister(const GprNode* tracked, const NodeBlock& code,
428 s64 cursor) const;
429
430 std::tuple<Node, Node, GlobalMemoryBase> TrackGlobalMemory(NodeBlock& bb,
431 Tegra::Shader::Instruction instr,
432 bool is_read, bool is_write);
433
434 /// Register new amending code and obtain the reference id.
435 std::size_t DeclareAmend(Node new_amend);
436
437 u32 NewCustomVariable();
438
439 const ProgramCode& program_code;
440 const u32 main_offset;
441 const CompilerSettings settings;
442 Registry& registry;
443
444 bool decompiled{};
445 bool disable_flow_stack{};
446
447 u32 coverage_begin{};
448 u32 coverage_end{};
449
450 std::map<u32, NodeBlock> basic_blocks;
451 NodeBlock global_code;
452 ASTManager program_manager{true, true};
453 std::vector<Node> amend_code;
454 u32 num_custom_variables{};
455
456 std::set<u32> used_registers;
457 std::set<Tegra::Shader::Pred> used_predicates;
458 std::set<Tegra::Shader::Attribute::Index> used_input_attributes;
459 std::set<Tegra::Shader::Attribute::Index> used_output_attributes;
460 std::map<u32, ConstBuffer> used_cbufs;
461 std::list<SamplerEntry> used_samplers;
462 std::list<ImageEntry> used_images;
463 std::array<bool, Tegra::Engines::Maxwell3D::Regs::NumClipDistances> used_clip_distances{};
464 std::map<GlobalMemoryBase, GlobalMemoryUsage> used_global_memory;
465 bool uses_layer{};
466 bool uses_viewport_index{};
467 bool uses_point_size{};
468 bool uses_physical_attributes{}; // Shader uses AL2P or physical attribute read/writes
469 bool uses_instance_id{};
470 bool uses_vertex_id{};
471 bool uses_legacy_varyings{};
472 bool uses_y_negate{};
473 bool uses_warps{};
474 bool uses_indexed_samplers{};
475
476 Tegra::Shader::Header header;
477};
478
479} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/track.cpp b/src/video_core/shader/track.cpp
deleted file mode 100644
index 6be3ea92b..000000000
--- a/src/video_core/shader/track.cpp
+++ /dev/null
@@ -1,236 +0,0 @@
1// Copyright 2018 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <algorithm>
6#include <utility>
7#include <variant>
8
9#include "common/common_types.h"
10#include "video_core/shader/node.h"
11#include "video_core/shader/node_helper.h"
12#include "video_core/shader/shader_ir.h"
13
14namespace VideoCommon::Shader {
15
16namespace {
17
18std::pair<Node, s64> FindOperation(const NodeBlock& code, s64 cursor,
19 OperationCode operation_code) {
20 for (; cursor >= 0; --cursor) {
21 Node node = code.at(cursor);
22
23 if (const auto operation = std::get_if<OperationNode>(&*node)) {
24 if (operation->GetCode() == operation_code) {
25 return {std::move(node), cursor};
26 }
27 }
28
29 if (const auto conditional = std::get_if<ConditionalNode>(&*node)) {
30 const auto& conditional_code = conditional->GetCode();
31 auto result = FindOperation(
32 conditional_code, static_cast<s64>(conditional_code.size() - 1), operation_code);
33 auto& found = result.first;
34 if (found) {
35 return {std::move(found), cursor};
36 }
37 }
38 }
39 return {};
40}
41
42std::optional<std::pair<Node, Node>> DecoupleIndirectRead(const OperationNode& operation) {
43 if (operation.GetCode() != OperationCode::UAdd) {
44 return std::nullopt;
45 }
46 Node gpr;
47 Node offset;
48 ASSERT(operation.GetOperandsCount() == 2);
49 for (std::size_t i = 0; i < operation.GetOperandsCount(); i++) {
50 Node operand = operation[i];
51 if (std::holds_alternative<ImmediateNode>(*operand)) {
52 offset = operation[i];
53 } else if (std::holds_alternative<GprNode>(*operand)) {
54 gpr = operation[i];
55 }
56 }
57 if (offset && gpr) {
58 return std::make_pair(gpr, offset);
59 }
60 return std::nullopt;
61}
62
63bool AmendNodeCv(std::size_t amend_index, Node node) {
64 if (const auto operation = std::get_if<OperationNode>(&*node)) {
65 operation->SetAmendIndex(amend_index);
66 return true;
67 }
68 if (const auto conditional = std::get_if<ConditionalNode>(&*node)) {
69 conditional->SetAmendIndex(amend_index);
70 return true;
71 }
72 return false;
73}
74
75} // Anonymous namespace
76
77std::pair<Node, TrackSampler> ShaderIR::TrackBindlessSampler(Node tracked, const NodeBlock& code,
78 s64 cursor) {
79 if (const auto cbuf = std::get_if<CbufNode>(&*tracked)) {
80 const u32 cbuf_index = cbuf->GetIndex();
81
82 // Constant buffer found, test if it's an immediate
83 const auto& offset = cbuf->GetOffset();
84 if (const auto immediate = std::get_if<ImmediateNode>(&*offset)) {
85 auto track = MakeTrackSampler<BindlessSamplerNode>(cbuf_index, immediate->GetValue());
86 return {tracked, track};
87 }
88 if (const auto operation = std::get_if<OperationNode>(&*offset)) {
89 const u32 bound_buffer = registry.GetBoundBuffer();
90 if (bound_buffer != cbuf_index) {
91 return {};
92 }
93 if (const std::optional pair = DecoupleIndirectRead(*operation)) {
94 auto [gpr, base_offset] = *pair;
95 return HandleBindlessIndirectRead(*cbuf, *operation, gpr, base_offset, tracked,
96 code, cursor);
97 }
98 }
99 return {};
100 }
101 if (const auto gpr = std::get_if<GprNode>(&*tracked)) {
102 if (gpr->GetIndex() == Tegra::Shader::Register::ZeroIndex) {
103 return {};
104 }
105 // Reduce the cursor in one to avoid infinite loops when the instruction sets the same
106 // register that it uses as operand
107 const auto [source, new_cursor] = TrackRegister(gpr, code, cursor - 1);
108 if (!source) {
109 return {};
110 }
111 return TrackBindlessSampler(source, code, new_cursor);
112 }
113 if (const auto operation = std::get_if<OperationNode>(&*tracked)) {
114 const OperationNode& op = *operation;
115
116 const OperationCode opcode = operation->GetCode();
117 if (opcode == OperationCode::IBitwiseOr || opcode == OperationCode::UBitwiseOr) {
118 ASSERT(op.GetOperandsCount() == 2);
119 auto [node_a, index_a, offset_a] = TrackCbuf(op[0], code, cursor);
120 auto [node_b, index_b, offset_b] = TrackCbuf(op[1], code, cursor);
121 if (node_a && node_b) {
122 auto track = MakeTrackSampler<SeparateSamplerNode>(std::pair{index_a, index_b},
123 std::pair{offset_a, offset_b});
124 return {tracked, std::move(track)};
125 }
126 }
127 std::size_t i = op.GetOperandsCount();
128 while (i--) {
129 if (auto found = TrackBindlessSampler(op[i - 1], code, cursor); std::get<0>(found)) {
130 // Constant buffer found in operand.
131 return found;
132 }
133 }
134 return {};
135 }
136 if (const auto conditional = std::get_if<ConditionalNode>(&*tracked)) {
137 const auto& conditional_code = conditional->GetCode();
138 return TrackBindlessSampler(tracked, conditional_code,
139 static_cast<s64>(conditional_code.size()));
140 }
141 return {};
142}
143
144std::pair<Node, TrackSampler> ShaderIR::HandleBindlessIndirectRead(
145 const CbufNode& cbuf, const OperationNode& operation, Node gpr, Node base_offset, Node tracked,
146 const NodeBlock& code, s64 cursor) {
147 const auto offset_imm = std::get<ImmediateNode>(*base_offset);
148 const auto& gpu_driver = registry.AccessGuestDriverProfile();
149 const u32 bindless_cv = NewCustomVariable();
150 const u32 texture_handler_size = gpu_driver.GetTextureHandlerSize();
151 Node op = Operation(OperationCode::UDiv, gpr, Immediate(texture_handler_size));
152
153 Node cv_node = GetCustomVariable(bindless_cv);
154 Node amend_op = Operation(OperationCode::Assign, std::move(cv_node), std::move(op));
155 const std::size_t amend_index = DeclareAmend(std::move(amend_op));
156 AmendNodeCv(amend_index, code[cursor]);
157
158 // TODO: Implement bindless index custom variable
159 auto track =
160 MakeTrackSampler<ArraySamplerNode>(cbuf.GetIndex(), offset_imm.GetValue(), bindless_cv);
161 return {tracked, track};
162}
163
164std::tuple<Node, u32, u32> ShaderIR::TrackCbuf(Node tracked, const NodeBlock& code,
165 s64 cursor) const {
166 if (const auto cbuf = std::get_if<CbufNode>(&*tracked)) {
167 // Constant buffer found, test if it's an immediate
168 const auto& offset = cbuf->GetOffset();
169 if (const auto immediate = std::get_if<ImmediateNode>(&*offset)) {
170 return {tracked, cbuf->GetIndex(), immediate->GetValue()};
171 }
172 return {};
173 }
174 if (const auto gpr = std::get_if<GprNode>(&*tracked)) {
175 if (gpr->GetIndex() == Tegra::Shader::Register::ZeroIndex) {
176 return {};
177 }
178 // Reduce the cursor in one to avoid infinite loops when the instruction sets the same
179 // register that it uses as operand
180 const auto [source, new_cursor] = TrackRegister(gpr, code, cursor - 1);
181 if (!source) {
182 return {};
183 }
184 return TrackCbuf(source, code, new_cursor);
185 }
186 if (const auto operation = std::get_if<OperationNode>(&*tracked)) {
187 for (std::size_t i = operation->GetOperandsCount(); i > 0; --i) {
188 if (auto found = TrackCbuf((*operation)[i - 1], code, cursor); std::get<0>(found)) {
189 // Cbuf found in operand.
190 return found;
191 }
192 }
193 return {};
194 }
195 if (const auto conditional = std::get_if<ConditionalNode>(&*tracked)) {
196 const auto& conditional_code = conditional->GetCode();
197 return TrackCbuf(tracked, conditional_code, static_cast<s64>(conditional_code.size()));
198 }
199 return {};
200}
201
202std::optional<u32> ShaderIR::TrackImmediate(Node tracked, const NodeBlock& code, s64 cursor) const {
203 // Reduce the cursor in one to avoid infinite loops when the instruction sets the same register
204 // that it uses as operand
205 const auto result = TrackRegister(&std::get<GprNode>(*tracked), code, cursor - 1);
206 const auto& found = result.first;
207 if (!found) {
208 return std::nullopt;
209 }
210 if (const auto immediate = std::get_if<ImmediateNode>(&*found)) {
211 return immediate->GetValue();
212 }
213 return std::nullopt;
214}
215
216std::pair<Node, s64> ShaderIR::TrackRegister(const GprNode* tracked, const NodeBlock& code,
217 s64 cursor) const {
218 for (; cursor >= 0; --cursor) {
219 const auto [found_node, new_cursor] = FindOperation(code, cursor, OperationCode::Assign);
220 if (!found_node) {
221 return {};
222 }
223 const auto operation = std::get_if<OperationNode>(&*found_node);
224 ASSERT(operation);
225
226 const auto& target = (*operation)[0];
227 if (const auto gpr_target = std::get_if<GprNode>(&*target)) {
228 if (gpr_target->GetIndex() == tracked->GetIndex()) {
229 return {(*operation)[1], new_cursor};
230 }
231 }
232 }
233 return {};
234}
235
236} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/transform_feedback.cpp b/src/video_core/shader/transform_feedback.cpp
deleted file mode 100644
index 22a933761..000000000
--- a/src/video_core/shader/transform_feedback.cpp
+++ /dev/null
@@ -1,115 +0,0 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <algorithm>
6#include <array>
7#include <unordered_map>
8
9#include "common/assert.h"
10#include "common/common_types.h"
11#include "video_core/engines/maxwell_3d.h"
12#include "video_core/shader/registry.h"
13#include "video_core/shader/transform_feedback.h"
14
15namespace VideoCommon::Shader {
16
17namespace {
18
19using Maxwell = Tegra::Engines::Maxwell3D::Regs;
20
21// TODO(Rodrigo): Change this to constexpr std::unordered_set in C++20
22
23/// Attribute offsets that describe a vector
24constexpr std::array VECTORS = {
25 28, // gl_Position
26 32, // Generic 0
27 36, // Generic 1
28 40, // Generic 2
29 44, // Generic 3
30 48, // Generic 4
31 52, // Generic 5
32 56, // Generic 6
33 60, // Generic 7
34 64, // Generic 8
35 68, // Generic 9
36 72, // Generic 10
37 76, // Generic 11
38 80, // Generic 12
39 84, // Generic 13
40 88, // Generic 14
41 92, // Generic 15
42 96, // Generic 16
43 100, // Generic 17
44 104, // Generic 18
45 108, // Generic 19
46 112, // Generic 20
47 116, // Generic 21
48 120, // Generic 22
49 124, // Generic 23
50 128, // Generic 24
51 132, // Generic 25
52 136, // Generic 26
53 140, // Generic 27
54 144, // Generic 28
55 148, // Generic 29
56 152, // Generic 30
57 156, // Generic 31
58 160, // gl_FrontColor
59 164, // gl_FrontSecondaryColor
60 160, // gl_BackColor
61 164, // gl_BackSecondaryColor
62 192, // gl_TexCoord[0]
63 196, // gl_TexCoord[1]
64 200, // gl_TexCoord[2]
65 204, // gl_TexCoord[3]
66 208, // gl_TexCoord[4]
67 212, // gl_TexCoord[5]
68 216, // gl_TexCoord[6]
69 220, // gl_TexCoord[7]
70};
71} // namespace
72
73std::unordered_map<u8, VaryingTFB> BuildTransformFeedback(const GraphicsInfo& info) {
74
75 std::unordered_map<u8, VaryingTFB> tfb;
76
77 for (std::size_t buffer = 0; buffer < Maxwell::NumTransformFeedbackBuffers; ++buffer) {
78 const auto& locations = info.tfb_varying_locs[buffer];
79 const auto& layout = info.tfb_layouts[buffer];
80 const std::size_t varying_count = layout.varying_count;
81
82 std::size_t highest = 0;
83
84 for (std::size_t offset = 0; offset < varying_count; ++offset) {
85 const std::size_t base_offset = offset;
86 const u8 location = locations[offset];
87
88 VaryingTFB varying;
89 varying.buffer = layout.stream;
90 varying.stride = layout.stride;
91 varying.offset = offset * sizeof(u32);
92 varying.components = 1;
93
94 if (std::find(VECTORS.begin(), VECTORS.end(), location / 4 * 4) != VECTORS.end()) {
95 UNIMPLEMENTED_IF_MSG(location % 4 != 0, "Unaligned TFB");
96
97 const u8 base_index = location / 4;
98 while (offset + 1 < varying_count && base_index == locations[offset + 1] / 4) {
99 ++offset;
100 ++varying.components;
101 }
102 }
103
104 [[maybe_unused]] const bool inserted = tfb.emplace(location, varying).second;
105 UNIMPLEMENTED_IF_MSG(!inserted, "Varying already stored");
106
107 highest = std::max(highest, (base_offset + varying.components) * sizeof(u32));
108 }
109
110 UNIMPLEMENTED_IF(highest != layout.stride);
111 }
112 return tfb;
113}
114
115} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/transform_feedback.h b/src/video_core/shader/transform_feedback.h
deleted file mode 100644
index 77d05f64c..000000000
--- a/src/video_core/shader/transform_feedback.h
+++ /dev/null
@@ -1,23 +0,0 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <unordered_map>
8
9#include "common/common_types.h"
10#include "video_core/shader/registry.h"
11
12namespace VideoCommon::Shader {
13
14struct VaryingTFB {
15 std::size_t buffer;
16 std::size_t stride;
17 std::size_t offset;
18 std::size_t components;
19};
20
21std::unordered_map<u8, VaryingTFB> BuildTransformFeedback(const GraphicsInfo& info);
22
23} // namespace VideoCommon::Shader