diff options
83 files changed, 55 insertions, 19623 deletions
diff --git a/CMakeModules/GenerateSCMRev.cmake b/CMakeModules/GenerateSCMRev.cmake index 311ba1c2e..77358768e 100644 --- a/CMakeModules/GenerateSCMRev.cmake +++ b/CMakeModules/GenerateSCMRev.cmake | |||
| @@ -51,61 +51,7 @@ endif() | |||
| 51 | # The variable SRC_DIR must be passed into the script (since it uses the current build directory for all values of CMAKE_*_DIR) | 51 | # The variable SRC_DIR must be passed into the script (since it uses the current build directory for all values of CMAKE_*_DIR) |
| 52 | set(VIDEO_CORE "${SRC_DIR}/src/video_core") | 52 | set(VIDEO_CORE "${SRC_DIR}/src/video_core") |
| 53 | set(HASH_FILES | 53 | set(HASH_FILES |
| 54 | "${VIDEO_CORE}/renderer_opengl/gl_arb_decompiler.cpp" | 54 | # ... |
| 55 | "${VIDEO_CORE}/renderer_opengl/gl_arb_decompiler.h" | ||
| 56 | "${VIDEO_CORE}/renderer_opengl/gl_shader_cache.cpp" | ||
| 57 | "${VIDEO_CORE}/renderer_opengl/gl_shader_cache.h" | ||
| 58 | "${VIDEO_CORE}/renderer_opengl/gl_shader_decompiler.cpp" | ||
| 59 | "${VIDEO_CORE}/renderer_opengl/gl_shader_decompiler.h" | ||
| 60 | "${VIDEO_CORE}/renderer_opengl/gl_shader_disk_cache.cpp" | ||
| 61 | "${VIDEO_CORE}/renderer_opengl/gl_shader_disk_cache.h" | ||
| 62 | "${VIDEO_CORE}/shader/decode/arithmetic.cpp" | ||
| 63 | "${VIDEO_CORE}/shader/decode/arithmetic_half.cpp" | ||
| 64 | "${VIDEO_CORE}/shader/decode/arithmetic_half_immediate.cpp" | ||
| 65 | "${VIDEO_CORE}/shader/decode/arithmetic_immediate.cpp" | ||
| 66 | "${VIDEO_CORE}/shader/decode/arithmetic_integer.cpp" | ||
| 67 | "${VIDEO_CORE}/shader/decode/arithmetic_integer_immediate.cpp" | ||
| 68 | "${VIDEO_CORE}/shader/decode/bfe.cpp" | ||
| 69 | "${VIDEO_CORE}/shader/decode/bfi.cpp" | ||
| 70 | "${VIDEO_CORE}/shader/decode/conversion.cpp" | ||
| 71 | "${VIDEO_CORE}/shader/decode/ffma.cpp" | ||
| 72 | "${VIDEO_CORE}/shader/decode/float_set.cpp" | ||
| 73 | "${VIDEO_CORE}/shader/decode/float_set_predicate.cpp" | ||
| 74 | "${VIDEO_CORE}/shader/decode/half_set.cpp" | ||
| 75 | "${VIDEO_CORE}/shader/decode/half_set_predicate.cpp" | ||
| 76 | "${VIDEO_CORE}/shader/decode/hfma2.cpp" | ||
| 77 | "${VIDEO_CORE}/shader/decode/image.cpp" | ||
| 78 | "${VIDEO_CORE}/shader/decode/integer_set.cpp" | ||
| 79 | "${VIDEO_CORE}/shader/decode/integer_set_predicate.cpp" | ||
| 80 | "${VIDEO_CORE}/shader/decode/memory.cpp" | ||
| 81 | "${VIDEO_CORE}/shader/decode/texture.cpp" | ||
| 82 | "${VIDEO_CORE}/shader/decode/other.cpp" | ||
| 83 | "${VIDEO_CORE}/shader/decode/predicate_set_predicate.cpp" | ||
| 84 | "${VIDEO_CORE}/shader/decode/predicate_set_register.cpp" | ||
| 85 | "${VIDEO_CORE}/shader/decode/register_set_predicate.cpp" | ||
| 86 | "${VIDEO_CORE}/shader/decode/shift.cpp" | ||
| 87 | "${VIDEO_CORE}/shader/decode/video.cpp" | ||
| 88 | "${VIDEO_CORE}/shader/decode/warp.cpp" | ||
| 89 | "${VIDEO_CORE}/shader/decode/xmad.cpp" | ||
| 90 | "${VIDEO_CORE}/shader/ast.cpp" | ||
| 91 | "${VIDEO_CORE}/shader/ast.h" | ||
| 92 | "${VIDEO_CORE}/shader/compiler_settings.cpp" | ||
| 93 | "${VIDEO_CORE}/shader/compiler_settings.h" | ||
| 94 | "${VIDEO_CORE}/shader/control_flow.cpp" | ||
| 95 | "${VIDEO_CORE}/shader/control_flow.h" | ||
| 96 | "${VIDEO_CORE}/shader/decode.cpp" | ||
| 97 | "${VIDEO_CORE}/shader/expr.cpp" | ||
| 98 | "${VIDEO_CORE}/shader/expr.h" | ||
| 99 | "${VIDEO_CORE}/shader/node.h" | ||
| 100 | "${VIDEO_CORE}/shader/node_helper.cpp" | ||
| 101 | "${VIDEO_CORE}/shader/node_helper.h" | ||
| 102 | "${VIDEO_CORE}/shader/registry.cpp" | ||
| 103 | "${VIDEO_CORE}/shader/registry.h" | ||
| 104 | "${VIDEO_CORE}/shader/shader_ir.cpp" | ||
| 105 | "${VIDEO_CORE}/shader/shader_ir.h" | ||
| 106 | "${VIDEO_CORE}/shader/track.cpp" | ||
| 107 | "${VIDEO_CORE}/shader/transform_feedback.cpp" | ||
| 108 | "${VIDEO_CORE}/shader/transform_feedback.h" | ||
| 109 | ) | 55 | ) |
| 110 | set(COMBINED "") | 56 | set(COMBINED "") |
| 111 | foreach (F IN LISTS HASH_FILES) | 57 | foreach (F IN LISTS HASH_FILES) |
diff --git a/externals/Vulkan-Headers b/externals/Vulkan-Headers | |||
| Subproject 8188e3fbbc105591064093440f88081fb957d4f | Subproject 07c4a37bcf41ea50aef6e98236abdfe8089fb4c | ||
diff --git a/externals/sirit b/externals/sirit | |||
| Subproject 200310e8faa756b9869dd6dfc902c255246ac74 | Subproject a39596358a3a5488c06554c0c15184a6af71e43 | ||
diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt index e03fffd8d..c92266a17 100644 --- a/src/common/CMakeLists.txt +++ b/src/common/CMakeLists.txt | |||
| @@ -32,61 +32,7 @@ add_custom_command(OUTPUT scm_rev.cpp | |||
| 32 | DEPENDS | 32 | DEPENDS |
| 33 | # WARNING! It was too much work to try and make a common location for this list, | 33 | # WARNING! It was too much work to try and make a common location for this list, |
| 34 | # so if you need to change it, please update CMakeModules/GenerateSCMRev.cmake as well | 34 | # so if you need to change it, please update CMakeModules/GenerateSCMRev.cmake as well |
| 35 | "${VIDEO_CORE}/renderer_opengl/gl_arb_decompiler.cpp" | 35 | # ... |
| 36 | "${VIDEO_CORE}/renderer_opengl/gl_arb_decompiler.h" | ||
| 37 | "${VIDEO_CORE}/renderer_opengl/gl_shader_cache.cpp" | ||
| 38 | "${VIDEO_CORE}/renderer_opengl/gl_shader_cache.h" | ||
| 39 | "${VIDEO_CORE}/renderer_opengl/gl_shader_decompiler.cpp" | ||
| 40 | "${VIDEO_CORE}/renderer_opengl/gl_shader_decompiler.h" | ||
| 41 | "${VIDEO_CORE}/renderer_opengl/gl_shader_disk_cache.cpp" | ||
| 42 | "${VIDEO_CORE}/renderer_opengl/gl_shader_disk_cache.h" | ||
| 43 | "${VIDEO_CORE}/shader/decode/arithmetic.cpp" | ||
| 44 | "${VIDEO_CORE}/shader/decode/arithmetic_half.cpp" | ||
| 45 | "${VIDEO_CORE}/shader/decode/arithmetic_half_immediate.cpp" | ||
| 46 | "${VIDEO_CORE}/shader/decode/arithmetic_immediate.cpp" | ||
| 47 | "${VIDEO_CORE}/shader/decode/arithmetic_integer.cpp" | ||
| 48 | "${VIDEO_CORE}/shader/decode/arithmetic_integer_immediate.cpp" | ||
| 49 | "${VIDEO_CORE}/shader/decode/bfe.cpp" | ||
| 50 | "${VIDEO_CORE}/shader/decode/bfi.cpp" | ||
| 51 | "${VIDEO_CORE}/shader/decode/conversion.cpp" | ||
| 52 | "${VIDEO_CORE}/shader/decode/ffma.cpp" | ||
| 53 | "${VIDEO_CORE}/shader/decode/float_set.cpp" | ||
| 54 | "${VIDEO_CORE}/shader/decode/float_set_predicate.cpp" | ||
| 55 | "${VIDEO_CORE}/shader/decode/half_set.cpp" | ||
| 56 | "${VIDEO_CORE}/shader/decode/half_set_predicate.cpp" | ||
| 57 | "${VIDEO_CORE}/shader/decode/hfma2.cpp" | ||
| 58 | "${VIDEO_CORE}/shader/decode/image.cpp" | ||
| 59 | "${VIDEO_CORE}/shader/decode/integer_set.cpp" | ||
| 60 | "${VIDEO_CORE}/shader/decode/integer_set_predicate.cpp" | ||
| 61 | "${VIDEO_CORE}/shader/decode/memory.cpp" | ||
| 62 | "${VIDEO_CORE}/shader/decode/texture.cpp" | ||
| 63 | "${VIDEO_CORE}/shader/decode/other.cpp" | ||
| 64 | "${VIDEO_CORE}/shader/decode/predicate_set_predicate.cpp" | ||
| 65 | "${VIDEO_CORE}/shader/decode/predicate_set_register.cpp" | ||
| 66 | "${VIDEO_CORE}/shader/decode/register_set_predicate.cpp" | ||
| 67 | "${VIDEO_CORE}/shader/decode/shift.cpp" | ||
| 68 | "${VIDEO_CORE}/shader/decode/video.cpp" | ||
| 69 | "${VIDEO_CORE}/shader/decode/warp.cpp" | ||
| 70 | "${VIDEO_CORE}/shader/decode/xmad.cpp" | ||
| 71 | "${VIDEO_CORE}/shader/ast.cpp" | ||
| 72 | "${VIDEO_CORE}/shader/ast.h" | ||
| 73 | "${VIDEO_CORE}/shader/compiler_settings.cpp" | ||
| 74 | "${VIDEO_CORE}/shader/compiler_settings.h" | ||
| 75 | "${VIDEO_CORE}/shader/control_flow.cpp" | ||
| 76 | "${VIDEO_CORE}/shader/control_flow.h" | ||
| 77 | "${VIDEO_CORE}/shader/decode.cpp" | ||
| 78 | "${VIDEO_CORE}/shader/expr.cpp" | ||
| 79 | "${VIDEO_CORE}/shader/expr.h" | ||
| 80 | "${VIDEO_CORE}/shader/node.h" | ||
| 81 | "${VIDEO_CORE}/shader/node_helper.cpp" | ||
| 82 | "${VIDEO_CORE}/shader/node_helper.h" | ||
| 83 | "${VIDEO_CORE}/shader/registry.cpp" | ||
| 84 | "${VIDEO_CORE}/shader/registry.h" | ||
| 85 | "${VIDEO_CORE}/shader/shader_ir.cpp" | ||
| 86 | "${VIDEO_CORE}/shader/shader_ir.h" | ||
| 87 | "${VIDEO_CORE}/shader/track.cpp" | ||
| 88 | "${VIDEO_CORE}/shader/transform_feedback.cpp" | ||
| 89 | "${VIDEO_CORE}/shader/transform_feedback.h" | ||
| 90 | # and also check that the scm_rev files haven't changed | 36 | # and also check that the scm_rev files haven't changed |
| 91 | "${CMAKE_CURRENT_SOURCE_DIR}/scm_rev.cpp.in" | 37 | "${CMAKE_CURRENT_SOURCE_DIR}/scm_rev.cpp.in" |
| 92 | "${CMAKE_CURRENT_SOURCE_DIR}/scm_rev.h" | 38 | "${CMAKE_CURRENT_SOURCE_DIR}/scm_rev.h" |
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index e4de55f4d..c5ce71706 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt | |||
| @@ -29,7 +29,6 @@ add_library(video_core STATIC | |||
| 29 | dirty_flags.h | 29 | dirty_flags.h |
| 30 | dma_pusher.cpp | 30 | dma_pusher.cpp |
| 31 | dma_pusher.h | 31 | dma_pusher.h |
| 32 | engines/const_buffer_engine_interface.h | ||
| 33 | engines/const_buffer_info.h | 32 | engines/const_buffer_info.h |
| 34 | engines/engine_interface.h | 33 | engines/engine_interface.h |
| 35 | engines/engine_upload.cpp | 34 | engines/engine_upload.cpp |
| @@ -61,8 +60,6 @@ add_library(video_core STATIC | |||
| 61 | gpu.h | 60 | gpu.h |
| 62 | gpu_thread.cpp | 61 | gpu_thread.cpp |
| 63 | gpu_thread.h | 62 | gpu_thread.h |
| 64 | guest_driver.cpp | ||
| 65 | guest_driver.h | ||
| 66 | memory_manager.cpp | 63 | memory_manager.cpp |
| 67 | memory_manager.h | 64 | memory_manager.h |
| 68 | query_cache.h | 65 | query_cache.h |
| @@ -71,8 +68,6 @@ add_library(video_core STATIC | |||
| 71 | rasterizer_interface.h | 68 | rasterizer_interface.h |
| 72 | renderer_base.cpp | 69 | renderer_base.cpp |
| 73 | renderer_base.h | 70 | renderer_base.h |
| 74 | renderer_opengl/gl_arb_decompiler.cpp | ||
| 75 | renderer_opengl/gl_arb_decompiler.h | ||
| 76 | renderer_opengl/gl_buffer_cache.cpp | 71 | renderer_opengl/gl_buffer_cache.cpp |
| 77 | renderer_opengl/gl_buffer_cache.h | 72 | renderer_opengl/gl_buffer_cache.h |
| 78 | renderer_opengl/gl_device.cpp | 73 | renderer_opengl/gl_device.cpp |
| @@ -85,10 +80,6 @@ add_library(video_core STATIC | |||
| 85 | renderer_opengl/gl_resource_manager.h | 80 | renderer_opengl/gl_resource_manager.h |
| 86 | renderer_opengl/gl_shader_cache.cpp | 81 | renderer_opengl/gl_shader_cache.cpp |
| 87 | renderer_opengl/gl_shader_cache.h | 82 | renderer_opengl/gl_shader_cache.h |
| 88 | renderer_opengl/gl_shader_decompiler.cpp | ||
| 89 | renderer_opengl/gl_shader_decompiler.h | ||
| 90 | renderer_opengl/gl_shader_disk_cache.cpp | ||
| 91 | renderer_opengl/gl_shader_disk_cache.h | ||
| 92 | renderer_opengl/gl_shader_manager.cpp | 83 | renderer_opengl/gl_shader_manager.cpp |
| 93 | renderer_opengl/gl_shader_manager.h | 84 | renderer_opengl/gl_shader_manager.h |
| 94 | renderer_opengl/gl_shader_util.cpp | 85 | renderer_opengl/gl_shader_util.cpp |
| @@ -128,8 +119,6 @@ add_library(video_core STATIC | |||
| 128 | renderer_vulkan/vk_descriptor_pool.h | 119 | renderer_vulkan/vk_descriptor_pool.h |
| 129 | renderer_vulkan/vk_fence_manager.cpp | 120 | renderer_vulkan/vk_fence_manager.cpp |
| 130 | renderer_vulkan/vk_fence_manager.h | 121 | renderer_vulkan/vk_fence_manager.h |
| 131 | renderer_vulkan/vk_graphics_pipeline.cpp | ||
| 132 | renderer_vulkan/vk_graphics_pipeline.h | ||
| 133 | renderer_vulkan/vk_master_semaphore.cpp | 122 | renderer_vulkan/vk_master_semaphore.cpp |
| 134 | renderer_vulkan/vk_master_semaphore.h | 123 | renderer_vulkan/vk_master_semaphore.h |
| 135 | renderer_vulkan/vk_pipeline_cache.cpp | 124 | renderer_vulkan/vk_pipeline_cache.cpp |
| @@ -142,8 +131,6 @@ add_library(video_core STATIC | |||
| 142 | renderer_vulkan/vk_resource_pool.h | 131 | renderer_vulkan/vk_resource_pool.h |
| 143 | renderer_vulkan/vk_scheduler.cpp | 132 | renderer_vulkan/vk_scheduler.cpp |
| 144 | renderer_vulkan/vk_scheduler.h | 133 | renderer_vulkan/vk_scheduler.h |
| 145 | renderer_vulkan/vk_shader_decompiler.cpp | ||
| 146 | renderer_vulkan/vk_shader_decompiler.h | ||
| 147 | renderer_vulkan/vk_shader_util.cpp | 134 | renderer_vulkan/vk_shader_util.cpp |
| 148 | renderer_vulkan/vk_shader_util.h | 135 | renderer_vulkan/vk_shader_util.h |
| 149 | renderer_vulkan/vk_staging_buffer_pool.cpp | 136 | renderer_vulkan/vk_staging_buffer_pool.cpp |
| @@ -159,57 +146,6 @@ add_library(video_core STATIC | |||
| 159 | shader_cache.h | 146 | shader_cache.h |
| 160 | shader_notify.cpp | 147 | shader_notify.cpp |
| 161 | shader_notify.h | 148 | shader_notify.h |
| 162 | shader/decode/arithmetic.cpp | ||
| 163 | shader/decode/arithmetic_immediate.cpp | ||
| 164 | shader/decode/bfe.cpp | ||
| 165 | shader/decode/bfi.cpp | ||
| 166 | shader/decode/shift.cpp | ||
| 167 | shader/decode/arithmetic_integer.cpp | ||
| 168 | shader/decode/arithmetic_integer_immediate.cpp | ||
| 169 | shader/decode/arithmetic_half.cpp | ||
| 170 | shader/decode/arithmetic_half_immediate.cpp | ||
| 171 | shader/decode/ffma.cpp | ||
| 172 | shader/decode/hfma2.cpp | ||
| 173 | shader/decode/conversion.cpp | ||
| 174 | shader/decode/memory.cpp | ||
| 175 | shader/decode/texture.cpp | ||
| 176 | shader/decode/image.cpp | ||
| 177 | shader/decode/float_set_predicate.cpp | ||
| 178 | shader/decode/integer_set_predicate.cpp | ||
| 179 | shader/decode/half_set_predicate.cpp | ||
| 180 | shader/decode/predicate_set_register.cpp | ||
| 181 | shader/decode/predicate_set_predicate.cpp | ||
| 182 | shader/decode/register_set_predicate.cpp | ||
| 183 | shader/decode/float_set.cpp | ||
| 184 | shader/decode/integer_set.cpp | ||
| 185 | shader/decode/half_set.cpp | ||
| 186 | shader/decode/video.cpp | ||
| 187 | shader/decode/warp.cpp | ||
| 188 | shader/decode/xmad.cpp | ||
| 189 | shader/decode/other.cpp | ||
| 190 | shader/ast.cpp | ||
| 191 | shader/ast.h | ||
| 192 | shader/async_shaders.cpp | ||
| 193 | shader/async_shaders.h | ||
| 194 | shader/compiler_settings.cpp | ||
| 195 | shader/compiler_settings.h | ||
| 196 | shader/control_flow.cpp | ||
| 197 | shader/control_flow.h | ||
| 198 | shader/decode.cpp | ||
| 199 | shader/expr.cpp | ||
| 200 | shader/expr.h | ||
| 201 | shader/memory_util.cpp | ||
| 202 | shader/memory_util.h | ||
| 203 | shader/node_helper.cpp | ||
| 204 | shader/node_helper.h | ||
| 205 | shader/node.h | ||
| 206 | shader/registry.cpp | ||
| 207 | shader/registry.h | ||
| 208 | shader/shader_ir.cpp | ||
| 209 | shader/shader_ir.h | ||
| 210 | shader/track.cpp | ||
| 211 | shader/transform_feedback.cpp | ||
| 212 | shader/transform_feedback.h | ||
| 213 | surface.cpp | 149 | surface.cpp |
| 214 | surface.h | 150 | surface.h |
| 215 | texture_cache/accelerated_swizzle.cpp | 151 | texture_cache/accelerated_swizzle.cpp |
diff --git a/src/video_core/engines/const_buffer_engine_interface.h b/src/video_core/engines/const_buffer_engine_interface.h deleted file mode 100644 index f46e81bb7..000000000 --- a/src/video_core/engines/const_buffer_engine_interface.h +++ /dev/null | |||
| @@ -1,103 +0,0 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <type_traits> | ||
| 8 | #include "common/bit_field.h" | ||
| 9 | #include "common/common_types.h" | ||
| 10 | #include "video_core/engines/shader_bytecode.h" | ||
| 11 | #include "video_core/engines/shader_type.h" | ||
| 12 | #include "video_core/guest_driver.h" | ||
| 13 | #include "video_core/textures/texture.h" | ||
| 14 | |||
| 15 | namespace Tegra::Engines { | ||
| 16 | |||
| 17 | struct SamplerDescriptor { | ||
| 18 | union { | ||
| 19 | u32 raw = 0; | ||
| 20 | BitField<0, 2, Tegra::Shader::TextureType> texture_type; | ||
| 21 | BitField<2, 3, Tegra::Texture::ComponentType> r_type; | ||
| 22 | BitField<5, 1, u32> is_array; | ||
| 23 | BitField<6, 1, u32> is_buffer; | ||
| 24 | BitField<7, 1, u32> is_shadow; | ||
| 25 | BitField<8, 3, Tegra::Texture::ComponentType> g_type; | ||
| 26 | BitField<11, 3, Tegra::Texture::ComponentType> b_type; | ||
| 27 | BitField<14, 3, Tegra::Texture::ComponentType> a_type; | ||
| 28 | BitField<17, 7, Tegra::Texture::TextureFormat> format; | ||
| 29 | }; | ||
| 30 | |||
| 31 | bool operator==(const SamplerDescriptor& rhs) const noexcept { | ||
| 32 | return raw == rhs.raw; | ||
| 33 | } | ||
| 34 | |||
| 35 | bool operator!=(const SamplerDescriptor& rhs) const noexcept { | ||
| 36 | return !operator==(rhs); | ||
| 37 | } | ||
| 38 | |||
| 39 | static SamplerDescriptor FromTIC(const Tegra::Texture::TICEntry& tic) { | ||
| 40 | using Tegra::Shader::TextureType; | ||
| 41 | SamplerDescriptor result; | ||
| 42 | |||
| 43 | result.format.Assign(tic.format.Value()); | ||
| 44 | result.r_type.Assign(tic.r_type.Value()); | ||
| 45 | result.g_type.Assign(tic.g_type.Value()); | ||
| 46 | result.b_type.Assign(tic.b_type.Value()); | ||
| 47 | result.a_type.Assign(tic.a_type.Value()); | ||
| 48 | |||
| 49 | switch (tic.texture_type.Value()) { | ||
| 50 | case Tegra::Texture::TextureType::Texture1D: | ||
| 51 | result.texture_type.Assign(TextureType::Texture1D); | ||
| 52 | return result; | ||
| 53 | case Tegra::Texture::TextureType::Texture2D: | ||
| 54 | result.texture_type.Assign(TextureType::Texture2D); | ||
| 55 | return result; | ||
| 56 | case Tegra::Texture::TextureType::Texture3D: | ||
| 57 | result.texture_type.Assign(TextureType::Texture3D); | ||
| 58 | return result; | ||
| 59 | case Tegra::Texture::TextureType::TextureCubemap: | ||
| 60 | result.texture_type.Assign(TextureType::TextureCube); | ||
| 61 | return result; | ||
| 62 | case Tegra::Texture::TextureType::Texture1DArray: | ||
| 63 | result.texture_type.Assign(TextureType::Texture1D); | ||
| 64 | result.is_array.Assign(1); | ||
| 65 | return result; | ||
| 66 | case Tegra::Texture::TextureType::Texture2DArray: | ||
| 67 | result.texture_type.Assign(TextureType::Texture2D); | ||
| 68 | result.is_array.Assign(1); | ||
| 69 | return result; | ||
| 70 | case Tegra::Texture::TextureType::Texture1DBuffer: | ||
| 71 | result.texture_type.Assign(TextureType::Texture1D); | ||
| 72 | result.is_buffer.Assign(1); | ||
| 73 | return result; | ||
| 74 | case Tegra::Texture::TextureType::Texture2DNoMipmap: | ||
| 75 | result.texture_type.Assign(TextureType::Texture2D); | ||
| 76 | return result; | ||
| 77 | case Tegra::Texture::TextureType::TextureCubeArray: | ||
| 78 | result.texture_type.Assign(TextureType::TextureCube); | ||
| 79 | result.is_array.Assign(1); | ||
| 80 | return result; | ||
| 81 | default: | ||
| 82 | result.texture_type.Assign(TextureType::Texture2D); | ||
| 83 | return result; | ||
| 84 | } | ||
| 85 | } | ||
| 86 | }; | ||
| 87 | static_assert(std::is_trivially_copyable_v<SamplerDescriptor>); | ||
| 88 | |||
| 89 | class ConstBufferEngineInterface { | ||
| 90 | public: | ||
| 91 | virtual ~ConstBufferEngineInterface() = default; | ||
| 92 | virtual u32 AccessConstBuffer32(ShaderType stage, u64 const_buffer, u64 offset) const = 0; | ||
| 93 | virtual SamplerDescriptor AccessBoundSampler(ShaderType stage, u64 offset) const = 0; | ||
| 94 | virtual SamplerDescriptor AccessBindlessSampler(ShaderType stage, u64 const_buffer, | ||
| 95 | u64 offset) const = 0; | ||
| 96 | virtual SamplerDescriptor AccessSampler(u32 handle) const = 0; | ||
| 97 | virtual u32 GetBoundBuffer() const = 0; | ||
| 98 | |||
| 99 | virtual VideoCore::GuestDriverProfile& AccessGuestDriverProfile() = 0; | ||
| 100 | virtual const VideoCore::GuestDriverProfile& AccessGuestDriverProfile() const = 0; | ||
| 101 | }; | ||
| 102 | |||
| 103 | } // namespace Tegra::Engines | ||
diff --git a/src/video_core/engines/kepler_compute.cpp b/src/video_core/engines/kepler_compute.cpp index a9b75091e..cae93c470 100644 --- a/src/video_core/engines/kepler_compute.cpp +++ b/src/video_core/engines/kepler_compute.cpp | |||
| @@ -57,53 +57,11 @@ void KeplerCompute::CallMultiMethod(u32 method, const u32* base_start, u32 amoun | |||
| 57 | } | 57 | } |
| 58 | } | 58 | } |
| 59 | 59 | ||
| 60 | u32 KeplerCompute::AccessConstBuffer32(ShaderType stage, u64 const_buffer, u64 offset) const { | ||
| 61 | ASSERT(stage == ShaderType::Compute); | ||
| 62 | const auto& buffer = launch_description.const_buffer_config[const_buffer]; | ||
| 63 | u32 result; | ||
| 64 | std::memcpy(&result, memory_manager.GetPointer(buffer.Address() + offset), sizeof(u32)); | ||
| 65 | return result; | ||
| 66 | } | ||
| 67 | |||
| 68 | SamplerDescriptor KeplerCompute::AccessBoundSampler(ShaderType stage, u64 offset) const { | ||
| 69 | return AccessBindlessSampler(stage, regs.tex_cb_index, offset * sizeof(Texture::TextureHandle)); | ||
| 70 | } | ||
| 71 | |||
| 72 | SamplerDescriptor KeplerCompute::AccessBindlessSampler(ShaderType stage, u64 const_buffer, | ||
| 73 | u64 offset) const { | ||
| 74 | ASSERT(stage == ShaderType::Compute); | ||
| 75 | const auto& tex_info_buffer = launch_description.const_buffer_config[const_buffer]; | ||
| 76 | const GPUVAddr tex_info_address = tex_info_buffer.Address() + offset; | ||
| 77 | return AccessSampler(memory_manager.Read<u32>(tex_info_address)); | ||
| 78 | } | ||
| 79 | |||
| 80 | SamplerDescriptor KeplerCompute::AccessSampler(u32 handle) const { | ||
| 81 | const Texture::TextureHandle tex_handle{handle}; | ||
| 82 | const Texture::TICEntry tic = GetTICEntry(tex_handle.tic_id); | ||
| 83 | const Texture::TSCEntry tsc = GetTSCEntry(tex_handle.tsc_id); | ||
| 84 | |||
| 85 | SamplerDescriptor result = SamplerDescriptor::FromTIC(tic); | ||
| 86 | result.is_shadow.Assign(tsc.depth_compare_enabled.Value()); | ||
| 87 | return result; | ||
| 88 | } | ||
| 89 | |||
| 90 | VideoCore::GuestDriverProfile& KeplerCompute::AccessGuestDriverProfile() { | ||
| 91 | return rasterizer->AccessGuestDriverProfile(); | ||
| 92 | } | ||
| 93 | |||
| 94 | const VideoCore::GuestDriverProfile& KeplerCompute::AccessGuestDriverProfile() const { | ||
| 95 | return rasterizer->AccessGuestDriverProfile(); | ||
| 96 | } | ||
| 97 | |||
| 98 | void KeplerCompute::ProcessLaunch() { | 60 | void KeplerCompute::ProcessLaunch() { |
| 99 | const GPUVAddr launch_desc_loc = regs.launch_desc_loc.Address(); | 61 | const GPUVAddr launch_desc_loc = regs.launch_desc_loc.Address(); |
| 100 | memory_manager.ReadBlockUnsafe(launch_desc_loc, &launch_description, | 62 | memory_manager.ReadBlockUnsafe(launch_desc_loc, &launch_description, |
| 101 | LaunchParams::NUM_LAUNCH_PARAMETERS * sizeof(u32)); | 63 | LaunchParams::NUM_LAUNCH_PARAMETERS * sizeof(u32)); |
| 102 | 64 | rasterizer->DispatchCompute(); | |
| 103 | const GPUVAddr code_addr = regs.code_loc.Address() + launch_description.program_start; | ||
| 104 | LOG_TRACE(HW_GPU, "Compute invocation launched at address 0x{:016x}", code_addr); | ||
| 105 | |||
| 106 | rasterizer->DispatchCompute(code_addr); | ||
| 107 | } | 65 | } |
| 108 | 66 | ||
| 109 | Texture::TICEntry KeplerCompute::GetTICEntry(u32 tic_index) const { | 67 | Texture::TICEntry KeplerCompute::GetTICEntry(u32 tic_index) const { |
diff --git a/src/video_core/engines/kepler_compute.h b/src/video_core/engines/kepler_compute.h index 7c40cba38..0d7683c2d 100644 --- a/src/video_core/engines/kepler_compute.h +++ b/src/video_core/engines/kepler_compute.h | |||
| @@ -10,7 +10,6 @@ | |||
| 10 | #include "common/bit_field.h" | 10 | #include "common/bit_field.h" |
| 11 | #include "common/common_funcs.h" | 11 | #include "common/common_funcs.h" |
| 12 | #include "common/common_types.h" | 12 | #include "common/common_types.h" |
| 13 | #include "video_core/engines/const_buffer_engine_interface.h" | ||
| 14 | #include "video_core/engines/engine_interface.h" | 13 | #include "video_core/engines/engine_interface.h" |
| 15 | #include "video_core/engines/engine_upload.h" | 14 | #include "video_core/engines/engine_upload.h" |
| 16 | #include "video_core/engines/shader_type.h" | 15 | #include "video_core/engines/shader_type.h" |
| @@ -40,7 +39,7 @@ namespace Tegra::Engines { | |||
| 40 | #define KEPLER_COMPUTE_REG_INDEX(field_name) \ | 39 | #define KEPLER_COMPUTE_REG_INDEX(field_name) \ |
| 41 | (offsetof(Tegra::Engines::KeplerCompute::Regs, field_name) / sizeof(u32)) | 40 | (offsetof(Tegra::Engines::KeplerCompute::Regs, field_name) / sizeof(u32)) |
| 42 | 41 | ||
| 43 | class KeplerCompute final : public ConstBufferEngineInterface, public EngineInterface { | 42 | class KeplerCompute final : public EngineInterface { |
| 44 | public: | 43 | public: |
| 45 | explicit KeplerCompute(Core::System& system, MemoryManager& memory_manager); | 44 | explicit KeplerCompute(Core::System& system, MemoryManager& memory_manager); |
| 46 | ~KeplerCompute(); | 45 | ~KeplerCompute(); |
| @@ -209,23 +208,6 @@ public: | |||
| 209 | void CallMultiMethod(u32 method, const u32* base_start, u32 amount, | 208 | void CallMultiMethod(u32 method, const u32* base_start, u32 amount, |
| 210 | u32 methods_pending) override; | 209 | u32 methods_pending) override; |
| 211 | 210 | ||
| 212 | u32 AccessConstBuffer32(ShaderType stage, u64 const_buffer, u64 offset) const override; | ||
| 213 | |||
| 214 | SamplerDescriptor AccessBoundSampler(ShaderType stage, u64 offset) const override; | ||
| 215 | |||
| 216 | SamplerDescriptor AccessBindlessSampler(ShaderType stage, u64 const_buffer, | ||
| 217 | u64 offset) const override; | ||
| 218 | |||
| 219 | SamplerDescriptor AccessSampler(u32 handle) const override; | ||
| 220 | |||
| 221 | u32 GetBoundBuffer() const override { | ||
| 222 | return regs.tex_cb_index; | ||
| 223 | } | ||
| 224 | |||
| 225 | VideoCore::GuestDriverProfile& AccessGuestDriverProfile() override; | ||
| 226 | |||
| 227 | const VideoCore::GuestDriverProfile& AccessGuestDriverProfile() const override; | ||
| 228 | |||
| 229 | private: | 211 | private: |
| 230 | void ProcessLaunch(); | 212 | void ProcessLaunch(); |
| 231 | 213 | ||
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index aab6b8f7a..103a51fd0 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp | |||
| @@ -670,42 +670,4 @@ void Maxwell3D::ProcessClearBuffers() { | |||
| 670 | rasterizer->Clear(); | 670 | rasterizer->Clear(); |
| 671 | } | 671 | } |
| 672 | 672 | ||
| 673 | u32 Maxwell3D::AccessConstBuffer32(ShaderType stage, u64 const_buffer, u64 offset) const { | ||
| 674 | ASSERT(stage != ShaderType::Compute); | ||
| 675 | const auto& shader_stage = state.shader_stages[static_cast<std::size_t>(stage)]; | ||
| 676 | const auto& buffer = shader_stage.const_buffers[const_buffer]; | ||
| 677 | return memory_manager.Read<u32>(buffer.address + offset); | ||
| 678 | } | ||
| 679 | |||
| 680 | SamplerDescriptor Maxwell3D::AccessBoundSampler(ShaderType stage, u64 offset) const { | ||
| 681 | return AccessBindlessSampler(stage, regs.tex_cb_index, offset * sizeof(Texture::TextureHandle)); | ||
| 682 | } | ||
| 683 | |||
| 684 | SamplerDescriptor Maxwell3D::AccessBindlessSampler(ShaderType stage, u64 const_buffer, | ||
| 685 | u64 offset) const { | ||
| 686 | ASSERT(stage != ShaderType::Compute); | ||
| 687 | const auto& shader = state.shader_stages[static_cast<std::size_t>(stage)]; | ||
| 688 | const auto& tex_info_buffer = shader.const_buffers[const_buffer]; | ||
| 689 | const GPUVAddr tex_info_address = tex_info_buffer.address + offset; | ||
| 690 | return AccessSampler(memory_manager.Read<u32>(tex_info_address)); | ||
| 691 | } | ||
| 692 | |||
| 693 | SamplerDescriptor Maxwell3D::AccessSampler(u32 handle) const { | ||
| 694 | const Texture::TextureHandle tex_handle{handle}; | ||
| 695 | const Texture::TICEntry tic = GetTICEntry(tex_handle.tic_id); | ||
| 696 | const Texture::TSCEntry tsc = GetTSCEntry(tex_handle.tsc_id); | ||
| 697 | |||
| 698 | SamplerDescriptor result = SamplerDescriptor::FromTIC(tic); | ||
| 699 | result.is_shadow.Assign(tsc.depth_compare_enabled.Value()); | ||
| 700 | return result; | ||
| 701 | } | ||
| 702 | |||
| 703 | VideoCore::GuestDriverProfile& Maxwell3D::AccessGuestDriverProfile() { | ||
| 704 | return rasterizer->AccessGuestDriverProfile(); | ||
| 705 | } | ||
| 706 | |||
| 707 | const VideoCore::GuestDriverProfile& Maxwell3D::AccessGuestDriverProfile() const { | ||
| 708 | return rasterizer->AccessGuestDriverProfile(); | ||
| 709 | } | ||
| 710 | |||
| 711 | } // namespace Tegra::Engines | 673 | } // namespace Tegra::Engines |
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index 335383955..cbf94412b 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h | |||
| @@ -17,7 +17,6 @@ | |||
| 17 | #include "common/common_funcs.h" | 17 | #include "common/common_funcs.h" |
| 18 | #include "common/common_types.h" | 18 | #include "common/common_types.h" |
| 19 | #include "common/math_util.h" | 19 | #include "common/math_util.h" |
| 20 | #include "video_core/engines/const_buffer_engine_interface.h" | ||
| 21 | #include "video_core/engines/const_buffer_info.h" | 20 | #include "video_core/engines/const_buffer_info.h" |
| 22 | #include "video_core/engines/engine_interface.h" | 21 | #include "video_core/engines/engine_interface.h" |
| 23 | #include "video_core/engines/engine_upload.h" | 22 | #include "video_core/engines/engine_upload.h" |
| @@ -49,7 +48,7 @@ namespace Tegra::Engines { | |||
| 49 | #define MAXWELL3D_REG_INDEX(field_name) \ | 48 | #define MAXWELL3D_REG_INDEX(field_name) \ |
| 50 | (offsetof(Tegra::Engines::Maxwell3D::Regs, field_name) / sizeof(u32)) | 49 | (offsetof(Tegra::Engines::Maxwell3D::Regs, field_name) / sizeof(u32)) |
| 51 | 50 | ||
| 52 | class Maxwell3D final : public ConstBufferEngineInterface, public EngineInterface { | 51 | class Maxwell3D final : public EngineInterface { |
| 53 | public: | 52 | public: |
| 54 | explicit Maxwell3D(Core::System& system, MemoryManager& memory_manager); | 53 | explicit Maxwell3D(Core::System& system, MemoryManager& memory_manager); |
| 55 | ~Maxwell3D(); | 54 | ~Maxwell3D(); |
| @@ -1424,23 +1423,6 @@ public: | |||
| 1424 | 1423 | ||
| 1425 | void FlushMMEInlineDraw(); | 1424 | void FlushMMEInlineDraw(); |
| 1426 | 1425 | ||
| 1427 | u32 AccessConstBuffer32(ShaderType stage, u64 const_buffer, u64 offset) const override; | ||
| 1428 | |||
| 1429 | SamplerDescriptor AccessBoundSampler(ShaderType stage, u64 offset) const override; | ||
| 1430 | |||
| 1431 | SamplerDescriptor AccessBindlessSampler(ShaderType stage, u64 const_buffer, | ||
| 1432 | u64 offset) const override; | ||
| 1433 | |||
| 1434 | SamplerDescriptor AccessSampler(u32 handle) const override; | ||
| 1435 | |||
| 1436 | u32 GetBoundBuffer() const override { | ||
| 1437 | return regs.tex_cb_index; | ||
| 1438 | } | ||
| 1439 | |||
| 1440 | VideoCore::GuestDriverProfile& AccessGuestDriverProfile() override; | ||
| 1441 | |||
| 1442 | const VideoCore::GuestDriverProfile& AccessGuestDriverProfile() const override; | ||
| 1443 | |||
| 1444 | bool ShouldExecute() const { | 1426 | bool ShouldExecute() const { |
| 1445 | return execute_on; | 1427 | return execute_on; |
| 1446 | } | 1428 | } |
diff --git a/src/video_core/guest_driver.cpp b/src/video_core/guest_driver.cpp deleted file mode 100644 index f058f2744..000000000 --- a/src/video_core/guest_driver.cpp +++ /dev/null | |||
| @@ -1,37 +0,0 @@ | |||
| 1 | // Copyright 2020 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <algorithm> | ||
| 6 | #include <limits> | ||
| 7 | #include <vector> | ||
| 8 | |||
| 9 | #include "common/common_types.h" | ||
| 10 | #include "video_core/guest_driver.h" | ||
| 11 | |||
| 12 | namespace VideoCore { | ||
| 13 | |||
| 14 | void GuestDriverProfile::DeduceTextureHandlerSize(std::vector<u32> bound_offsets) { | ||
| 15 | if (texture_handler_size) { | ||
| 16 | return; | ||
| 17 | } | ||
| 18 | const std::size_t size = bound_offsets.size(); | ||
| 19 | if (size < 2) { | ||
| 20 | return; | ||
| 21 | } | ||
| 22 | std::sort(bound_offsets.begin(), bound_offsets.end(), std::less{}); | ||
| 23 | u32 min_val = std::numeric_limits<u32>::max(); | ||
| 24 | for (std::size_t i = 1; i < size; ++i) { | ||
| 25 | if (bound_offsets[i] == bound_offsets[i - 1]) { | ||
| 26 | continue; | ||
| 27 | } | ||
| 28 | const u32 new_min = bound_offsets[i] - bound_offsets[i - 1]; | ||
| 29 | min_val = std::min(min_val, new_min); | ||
| 30 | } | ||
| 31 | if (min_val > 2) { | ||
| 32 | return; | ||
| 33 | } | ||
| 34 | texture_handler_size = min_texture_handler_size * min_val; | ||
| 35 | } | ||
| 36 | |||
| 37 | } // namespace VideoCore | ||
diff --git a/src/video_core/guest_driver.h b/src/video_core/guest_driver.h deleted file mode 100644 index 21e569ba1..000000000 --- a/src/video_core/guest_driver.h +++ /dev/null | |||
| @@ -1,46 +0,0 @@ | |||
| 1 | // Copyright 2020 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <optional> | ||
| 8 | #include <vector> | ||
| 9 | |||
| 10 | #include "common/common_types.h" | ||
| 11 | |||
| 12 | namespace VideoCore { | ||
| 13 | |||
| 14 | /** | ||
| 15 | * The GuestDriverProfile class is used to learn about the GPU drivers behavior and collect | ||
| 16 | * information necessary for impossible to avoid HLE methods like shader tracks as they are | ||
| 17 | * Entscheidungsproblems. | ||
| 18 | */ | ||
| 19 | class GuestDriverProfile { | ||
| 20 | public: | ||
| 21 | explicit GuestDriverProfile() = default; | ||
| 22 | explicit GuestDriverProfile(std::optional<u32> texture_handler_size_) | ||
| 23 | : texture_handler_size{texture_handler_size_} {} | ||
| 24 | |||
| 25 | void DeduceTextureHandlerSize(std::vector<u32> bound_offsets); | ||
| 26 | |||
| 27 | u32 GetTextureHandlerSize() const { | ||
| 28 | return texture_handler_size.value_or(default_texture_handler_size); | ||
| 29 | } | ||
| 30 | |||
| 31 | bool IsTextureHandlerSizeKnown() const { | ||
| 32 | return texture_handler_size.has_value(); | ||
| 33 | } | ||
| 34 | |||
| 35 | private: | ||
| 36 | // Minimum size of texture handler any driver can use. | ||
| 37 | static constexpr u32 min_texture_handler_size = 4; | ||
| 38 | |||
| 39 | // This goes with Vulkan and OpenGL standards but Nvidia GPUs can easily use 4 bytes instead. | ||
| 40 | // Thus, certain drivers may squish the size. | ||
| 41 | static constexpr u32 default_texture_handler_size = 8; | ||
| 42 | |||
| 43 | std::optional<u32> texture_handler_size = default_texture_handler_size; | ||
| 44 | }; | ||
| 45 | |||
| 46 | } // namespace VideoCore | ||
diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h index 58014c1c3..b094fc064 100644 --- a/src/video_core/rasterizer_interface.h +++ b/src/video_core/rasterizer_interface.h | |||
| @@ -11,7 +11,6 @@ | |||
| 11 | #include "common/common_types.h" | 11 | #include "common/common_types.h" |
| 12 | #include "video_core/engines/fermi_2d.h" | 12 | #include "video_core/engines/fermi_2d.h" |
| 13 | #include "video_core/gpu.h" | 13 | #include "video_core/gpu.h" |
| 14 | #include "video_core/guest_driver.h" | ||
| 15 | 14 | ||
| 16 | namespace Tegra { | 15 | namespace Tegra { |
| 17 | class MemoryManager; | 16 | class MemoryManager; |
| @@ -45,7 +44,7 @@ public: | |||
| 45 | virtual void Clear() = 0; | 44 | virtual void Clear() = 0; |
| 46 | 45 | ||
| 47 | /// Dispatches a compute shader invocation | 46 | /// Dispatches a compute shader invocation |
| 48 | virtual void DispatchCompute(GPUVAddr code_addr) = 0; | 47 | virtual void DispatchCompute() = 0; |
| 49 | 48 | ||
| 50 | /// Resets the counter of a query | 49 | /// Resets the counter of a query |
| 51 | virtual void ResetCounter(QueryType type) = 0; | 50 | virtual void ResetCounter(QueryType type) = 0; |
| @@ -136,18 +135,5 @@ public: | |||
| 136 | /// Initialize disk cached resources for the game being emulated | 135 | /// Initialize disk cached resources for the game being emulated |
| 137 | virtual void LoadDiskResources(u64 title_id, std::stop_token stop_loading, | 136 | virtual void LoadDiskResources(u64 title_id, std::stop_token stop_loading, |
| 138 | const DiskResourceLoadCallback& callback) {} | 137 | const DiskResourceLoadCallback& callback) {} |
| 139 | |||
| 140 | /// Grant access to the Guest Driver Profile for recording/obtaining info on the guest driver. | ||
| 141 | [[nodiscard]] GuestDriverProfile& AccessGuestDriverProfile() { | ||
| 142 | return guest_driver_profile; | ||
| 143 | } | ||
| 144 | |||
| 145 | /// Grant access to the Guest Driver Profile for recording/obtaining info on the guest driver. | ||
| 146 | [[nodiscard]] const GuestDriverProfile& AccessGuestDriverProfile() const { | ||
| 147 | return guest_driver_profile; | ||
| 148 | } | ||
| 149 | |||
| 150 | private: | ||
| 151 | GuestDriverProfile guest_driver_profile{}; | ||
| 152 | }; | 138 | }; |
| 153 | } // namespace VideoCore | 139 | } // namespace VideoCore |
diff --git a/src/video_core/renderer_opengl/gl_arb_decompiler.cpp b/src/video_core/renderer_opengl/gl_arb_decompiler.cpp deleted file mode 100644 index e8d8d2aa5..000000000 --- a/src/video_core/renderer_opengl/gl_arb_decompiler.cpp +++ /dev/null | |||
| @@ -1,2124 +0,0 @@ | |||
| 1 | // Copyright 2020 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <algorithm> | ||
| 6 | #include <array> | ||
| 7 | #include <cstddef> | ||
| 8 | #include <string> | ||
| 9 | #include <string_view> | ||
| 10 | #include <utility> | ||
| 11 | #include <variant> | ||
| 12 | |||
| 13 | #include <fmt/format.h> | ||
| 14 | |||
| 15 | #include "common/alignment.h" | ||
| 16 | #include "common/assert.h" | ||
| 17 | #include "common/common_types.h" | ||
| 18 | #include "video_core/renderer_opengl/gl_arb_decompiler.h" | ||
| 19 | #include "video_core/renderer_opengl/gl_device.h" | ||
| 20 | #include "video_core/shader/registry.h" | ||
| 21 | #include "video_core/shader/shader_ir.h" | ||
| 22 | |||
| 23 | // Predicates in the decompiled code follow the convention that -1 means true and 0 means false. | ||
| 24 | // GLASM lacks booleans, so they have to be implemented as integers. | ||
| 25 | // Using -1 for true is useful because both CMP.S and NOT.U can negate it, and CMP.S can be used to | ||
| 26 | // select between two values, because -1 will be evaluated as true and 0 as false. | ||
| 27 | |||
| 28 | namespace OpenGL { | ||
| 29 | |||
| 30 | namespace { | ||
| 31 | |||
| 32 | using Tegra::Engines::ShaderType; | ||
| 33 | using Tegra::Shader::Attribute; | ||
| 34 | using Tegra::Shader::PixelImap; | ||
| 35 | using Tegra::Shader::Register; | ||
| 36 | using namespace VideoCommon::Shader; | ||
| 37 | using Operation = const OperationNode&; | ||
| 38 | |||
| 39 | constexpr std::array INTERNAL_FLAG_NAMES = {"ZERO", "SIGN", "CARRY", "OVERFLOW"}; | ||
| 40 | |||
| 41 | char Swizzle(std::size_t component) { | ||
| 42 | static constexpr std::string_view SWIZZLE{"xyzw"}; | ||
| 43 | return SWIZZLE.at(component); | ||
| 44 | } | ||
| 45 | |||
| 46 | constexpr bool IsGenericAttribute(Attribute::Index index) { | ||
| 47 | return index >= Attribute::Index::Attribute_0 && index <= Attribute::Index::Attribute_31; | ||
| 48 | } | ||
| 49 | |||
| 50 | u32 GetGenericAttributeIndex(Attribute::Index index) { | ||
| 51 | ASSERT(IsGenericAttribute(index)); | ||
| 52 | return static_cast<u32>(index) - static_cast<u32>(Attribute::Index::Attribute_0); | ||
| 53 | } | ||
| 54 | |||
| 55 | std::string_view Modifiers(Operation operation) { | ||
| 56 | const auto meta = std::get_if<MetaArithmetic>(&operation.GetMeta()); | ||
| 57 | if (meta && meta->precise) { | ||
| 58 | return ".PREC"; | ||
| 59 | } | ||
| 60 | return ""; | ||
| 61 | } | ||
| 62 | |||
| 63 | std::string_view GetInputFlags(PixelImap attribute) { | ||
| 64 | switch (attribute) { | ||
| 65 | case PixelImap::Perspective: | ||
| 66 | return ""; | ||
| 67 | case PixelImap::Constant: | ||
| 68 | return "FLAT "; | ||
| 69 | case PixelImap::ScreenLinear: | ||
| 70 | return "NOPERSPECTIVE "; | ||
| 71 | case PixelImap::Unused: | ||
| 72 | break; | ||
| 73 | } | ||
| 74 | UNIMPLEMENTED_MSG("Unknown attribute usage index={}", attribute); | ||
| 75 | return {}; | ||
| 76 | } | ||
| 77 | |||
| 78 | std::string_view ImageType(Tegra::Shader::ImageType image_type) { | ||
| 79 | switch (image_type) { | ||
| 80 | case Tegra::Shader::ImageType::Texture1D: | ||
| 81 | return "1D"; | ||
| 82 | case Tegra::Shader::ImageType::TextureBuffer: | ||
| 83 | return "BUFFER"; | ||
| 84 | case Tegra::Shader::ImageType::Texture1DArray: | ||
| 85 | return "ARRAY1D"; | ||
| 86 | case Tegra::Shader::ImageType::Texture2D: | ||
| 87 | return "2D"; | ||
| 88 | case Tegra::Shader::ImageType::Texture2DArray: | ||
| 89 | return "ARRAY2D"; | ||
| 90 | case Tegra::Shader::ImageType::Texture3D: | ||
| 91 | return "3D"; | ||
| 92 | } | ||
| 93 | UNREACHABLE(); | ||
| 94 | return {}; | ||
| 95 | } | ||
| 96 | |||
| 97 | std::string_view StackName(MetaStackClass stack) { | ||
| 98 | switch (stack) { | ||
| 99 | case MetaStackClass::Ssy: | ||
| 100 | return "SSY"; | ||
| 101 | case MetaStackClass::Pbk: | ||
| 102 | return "PBK"; | ||
| 103 | } | ||
| 104 | UNREACHABLE(); | ||
| 105 | return ""; | ||
| 106 | }; | ||
| 107 | |||
| 108 | std::string_view PrimitiveDescription(Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology topology) { | ||
| 109 | switch (topology) { | ||
| 110 | case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::Points: | ||
| 111 | return "POINTS"; | ||
| 112 | case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::Lines: | ||
| 113 | case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::LineStrip: | ||
| 114 | return "LINES"; | ||
| 115 | case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::LinesAdjacency: | ||
| 116 | case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::LineStripAdjacency: | ||
| 117 | return "LINES_ADJACENCY"; | ||
| 118 | case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::Triangles: | ||
| 119 | case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::TriangleStrip: | ||
| 120 | case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::TriangleFan: | ||
| 121 | return "TRIANGLES"; | ||
| 122 | case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::TrianglesAdjacency: | ||
| 123 | case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::TriangleStripAdjacency: | ||
| 124 | return "TRIANGLES_ADJACENCY"; | ||
| 125 | default: | ||
| 126 | UNIMPLEMENTED_MSG("topology={}", topology); | ||
| 127 | return "POINTS"; | ||
| 128 | } | ||
| 129 | } | ||
| 130 | |||
| 131 | std::string_view TopologyName(Tegra::Shader::OutputTopology topology) { | ||
| 132 | switch (topology) { | ||
| 133 | case Tegra::Shader::OutputTopology::PointList: | ||
| 134 | return "POINTS"; | ||
| 135 | case Tegra::Shader::OutputTopology::LineStrip: | ||
| 136 | return "LINE_STRIP"; | ||
| 137 | case Tegra::Shader::OutputTopology::TriangleStrip: | ||
| 138 | return "TRIANGLE_STRIP"; | ||
| 139 | default: | ||
| 140 | UNIMPLEMENTED_MSG("Unknown output topology: {}", topology); | ||
| 141 | return "points"; | ||
| 142 | } | ||
| 143 | } | ||
| 144 | |||
| 145 | std::string_view StageInputName(ShaderType stage) { | ||
| 146 | switch (stage) { | ||
| 147 | case ShaderType::Vertex: | ||
| 148 | case ShaderType::Geometry: | ||
| 149 | return "vertex"; | ||
| 150 | case ShaderType::Fragment: | ||
| 151 | return "fragment"; | ||
| 152 | case ShaderType::Compute: | ||
| 153 | return "invocation"; | ||
| 154 | default: | ||
| 155 | UNREACHABLE(); | ||
| 156 | return ""; | ||
| 157 | } | ||
| 158 | } | ||
| 159 | |||
| 160 | std::string TextureType(const MetaTexture& meta) { | ||
| 161 | if (meta.sampler.is_buffer) { | ||
| 162 | return "BUFFER"; | ||
| 163 | } | ||
| 164 | std::string type; | ||
| 165 | if (meta.sampler.is_shadow) { | ||
| 166 | type += "SHADOW"; | ||
| 167 | } | ||
| 168 | if (meta.sampler.is_array) { | ||
| 169 | type += "ARRAY"; | ||
| 170 | } | ||
| 171 | type += [&meta] { | ||
| 172 | switch (meta.sampler.type) { | ||
| 173 | case Tegra::Shader::TextureType::Texture1D: | ||
| 174 | return "1D"; | ||
| 175 | case Tegra::Shader::TextureType::Texture2D: | ||
| 176 | return "2D"; | ||
| 177 | case Tegra::Shader::TextureType::Texture3D: | ||
| 178 | return "3D"; | ||
| 179 | case Tegra::Shader::TextureType::TextureCube: | ||
| 180 | return "CUBE"; | ||
| 181 | } | ||
| 182 | UNREACHABLE(); | ||
| 183 | return "2D"; | ||
| 184 | }(); | ||
| 185 | return type; | ||
| 186 | } | ||
| 187 | |||
| 188 | class ARBDecompiler final { | ||
| 189 | public: | ||
| 190 | explicit ARBDecompiler(const Device& device_, const ShaderIR& ir_, const Registry& registry_, | ||
| 191 | ShaderType stage_, std::string_view identifier); | ||
| 192 | |||
| 193 | std::string Code() const { | ||
| 194 | return shader_source; | ||
| 195 | } | ||
| 196 | |||
| 197 | private: | ||
| 198 | void DefineGlobalMemory(); | ||
| 199 | |||
| 200 | void DeclareHeader(); | ||
| 201 | void DeclareVertex(); | ||
| 202 | void DeclareGeometry(); | ||
| 203 | void DeclareFragment(); | ||
| 204 | void DeclareCompute(); | ||
| 205 | void DeclareInputAttributes(); | ||
| 206 | void DeclareOutputAttributes(); | ||
| 207 | void DeclareLocalMemory(); | ||
| 208 | void DeclareGlobalMemory(); | ||
| 209 | void DeclareConstantBuffers(); | ||
| 210 | void DeclareRegisters(); | ||
| 211 | void DeclareTemporaries(); | ||
| 212 | void DeclarePredicates(); | ||
| 213 | void DeclareInternalFlags(); | ||
| 214 | |||
| 215 | void InitializeVariables(); | ||
| 216 | |||
| 217 | void DecompileAST(); | ||
| 218 | void DecompileBranchMode(); | ||
| 219 | |||
| 220 | void VisitAST(const ASTNode& node); | ||
| 221 | std::string VisitExpression(const Expr& node); | ||
| 222 | |||
| 223 | void VisitBlock(const NodeBlock& bb); | ||
| 224 | |||
| 225 | std::string Visit(const Node& node); | ||
| 226 | |||
| 227 | std::tuple<std::string, std::string, std::size_t> BuildCoords(Operation); | ||
| 228 | std::string BuildAoffi(Operation); | ||
| 229 | std::string GlobalMemoryPointer(const GmemNode& gmem); | ||
| 230 | void Exit(); | ||
| 231 | |||
| 232 | std::string Assign(Operation); | ||
| 233 | std::string Select(Operation); | ||
| 234 | std::string FClamp(Operation); | ||
| 235 | std::string FCastHalf0(Operation); | ||
| 236 | std::string FCastHalf1(Operation); | ||
| 237 | std::string FSqrt(Operation); | ||
| 238 | std::string FSwizzleAdd(Operation); | ||
| 239 | std::string HAdd2(Operation); | ||
| 240 | std::string HMul2(Operation); | ||
| 241 | std::string HFma2(Operation); | ||
| 242 | std::string HAbsolute(Operation); | ||
| 243 | std::string HNegate(Operation); | ||
| 244 | std::string HClamp(Operation); | ||
| 245 | std::string HCastFloat(Operation); | ||
| 246 | std::string HUnpack(Operation); | ||
| 247 | std::string HMergeF32(Operation); | ||
| 248 | std::string HMergeH0(Operation); | ||
| 249 | std::string HMergeH1(Operation); | ||
| 250 | std::string HPack2(Operation); | ||
| 251 | std::string LogicalAssign(Operation); | ||
| 252 | std::string LogicalPick2(Operation); | ||
| 253 | std::string LogicalAnd2(Operation); | ||
| 254 | std::string FloatOrdered(Operation); | ||
| 255 | std::string FloatUnordered(Operation); | ||
| 256 | std::string LogicalAddCarry(Operation); | ||
| 257 | std::string Texture(Operation); | ||
| 258 | std::string TextureGather(Operation); | ||
| 259 | std::string TextureQueryDimensions(Operation); | ||
| 260 | std::string TextureQueryLod(Operation); | ||
| 261 | std::string TexelFetch(Operation); | ||
| 262 | std::string TextureGradient(Operation); | ||
| 263 | std::string ImageLoad(Operation); | ||
| 264 | std::string ImageStore(Operation); | ||
| 265 | std::string Branch(Operation); | ||
| 266 | std::string BranchIndirect(Operation); | ||
| 267 | std::string PushFlowStack(Operation); | ||
| 268 | std::string PopFlowStack(Operation); | ||
| 269 | std::string Exit(Operation); | ||
| 270 | std::string Discard(Operation); | ||
| 271 | std::string EmitVertex(Operation); | ||
| 272 | std::string EndPrimitive(Operation); | ||
| 273 | std::string InvocationId(Operation); | ||
| 274 | std::string YNegate(Operation); | ||
| 275 | std::string ThreadId(Operation); | ||
| 276 | std::string ShuffleIndexed(Operation); | ||
| 277 | std::string Barrier(Operation); | ||
| 278 | std::string MemoryBarrierGroup(Operation); | ||
| 279 | std::string MemoryBarrierGlobal(Operation); | ||
| 280 | |||
| 281 | template <const std::string_view& op> | ||
| 282 | std::string Unary(Operation operation) { | ||
| 283 | std::string temporary = AllocTemporary(); | ||
| 284 | AddLine("{}{} {}, {};", op, Modifiers(operation), temporary, Visit(operation[0])); | ||
| 285 | return temporary; | ||
| 286 | } | ||
| 287 | |||
| 288 | template <const std::string_view& op> | ||
| 289 | std::string Binary(Operation operation) { | ||
| 290 | std::string temporary = AllocTemporary(); | ||
| 291 | AddLine("{}{} {}, {}, {};", op, Modifiers(operation), temporary, Visit(operation[0]), | ||
| 292 | Visit(operation[1])); | ||
| 293 | return temporary; | ||
| 294 | } | ||
| 295 | |||
| 296 | template <const std::string_view& op> | ||
| 297 | std::string Trinary(Operation operation) { | ||
| 298 | std::string temporary = AllocTemporary(); | ||
| 299 | AddLine("{}{} {}, {}, {}, {};", op, Modifiers(operation), temporary, Visit(operation[0]), | ||
| 300 | Visit(operation[1]), Visit(operation[2])); | ||
| 301 | return temporary; | ||
| 302 | } | ||
| 303 | |||
| 304 | template <const std::string_view& op, bool unordered> | ||
| 305 | std::string FloatComparison(Operation operation) { | ||
| 306 | std::string temporary = AllocTemporary(); | ||
| 307 | AddLine("TRUNC.U.CC RC.x, {};", Binary<op>(operation)); | ||
| 308 | AddLine("MOV.S {}, 0;", temporary); | ||
| 309 | AddLine("MOV.S {} (NE.x), -1;", temporary); | ||
| 310 | |||
| 311 | const std::string op_a = Visit(operation[0]); | ||
| 312 | const std::string op_b = Visit(operation[1]); | ||
| 313 | if constexpr (unordered) { | ||
| 314 | AddLine("SNE.F RC.x, {}, {};", op_a, op_a); | ||
| 315 | AddLine("TRUNC.U.CC RC.x, RC.x;"); | ||
| 316 | AddLine("MOV.S {} (NE.x), -1;", temporary); | ||
| 317 | AddLine("SNE.F RC.x, {}, {};", op_b, op_b); | ||
| 318 | AddLine("TRUNC.U.CC RC.x, RC.x;"); | ||
| 319 | AddLine("MOV.S {} (NE.x), -1;", temporary); | ||
| 320 | } else if (op == SNE_F) { | ||
| 321 | AddLine("SNE.F RC.x, {}, {};", op_a, op_a); | ||
| 322 | AddLine("TRUNC.U.CC RC.x, RC.x;"); | ||
| 323 | AddLine("MOV.S {} (NE.x), 0;", temporary); | ||
| 324 | AddLine("SNE.F RC.x, {}, {};", op_b, op_b); | ||
| 325 | AddLine("TRUNC.U.CC RC.x, RC.x;"); | ||
| 326 | AddLine("MOV.S {} (NE.x), 0;", temporary); | ||
| 327 | } | ||
| 328 | return temporary; | ||
| 329 | } | ||
| 330 | |||
| 331 | template <const std::string_view& op, bool is_nan> | ||
| 332 | std::string HalfComparison(Operation operation) { | ||
| 333 | std::string tmp1 = AllocVectorTemporary(); | ||
| 334 | const std::string tmp2 = AllocVectorTemporary(); | ||
| 335 | const std::string op_a = Visit(operation[0]); | ||
| 336 | const std::string op_b = Visit(operation[1]); | ||
| 337 | AddLine("UP2H.F {}, {};", tmp1, op_a); | ||
| 338 | AddLine("UP2H.F {}, {};", tmp2, op_b); | ||
| 339 | AddLine("{} {}, {}, {};", op, tmp1, tmp1, tmp2); | ||
| 340 | AddLine("TRUNC.U.CC RC.xy, {};", tmp1); | ||
| 341 | AddLine("MOV.S {}.xy, {{0, 0, 0, 0}};", tmp1); | ||
| 342 | AddLine("MOV.S {}.x (NE.x), -1;", tmp1); | ||
| 343 | AddLine("MOV.S {}.y (NE.y), -1;", tmp1); | ||
| 344 | if constexpr (is_nan) { | ||
| 345 | AddLine("MOVC.F RC.x, {};", op_a); | ||
| 346 | AddLine("MOV.S {}.x (NAN.x), -1;", tmp1); | ||
| 347 | AddLine("MOVC.F RC.x, {};", op_b); | ||
| 348 | AddLine("MOV.S {}.y (NAN.x), -1;", tmp1); | ||
| 349 | } | ||
| 350 | return tmp1; | ||
| 351 | } | ||
| 352 | |||
| 353 | template <const std::string_view& op, const std::string_view& type> | ||
| 354 | std::string AtomicImage(Operation operation) { | ||
| 355 | const auto& meta = std::get<MetaImage>(operation.GetMeta()); | ||
| 356 | const u32 image_id = device.GetBaseBindings(stage).image + meta.image.index; | ||
| 357 | const std::size_t num_coords = operation.GetOperandsCount(); | ||
| 358 | const std::size_t num_values = meta.values.size(); | ||
| 359 | |||
| 360 | const std::string coord = AllocVectorTemporary(); | ||
| 361 | const std::string value = AllocVectorTemporary(); | ||
| 362 | for (std::size_t i = 0; i < num_coords; ++i) { | ||
| 363 | AddLine("MOV.S {}.{}, {};", coord, Swizzle(i), Visit(operation[i])); | ||
| 364 | } | ||
| 365 | for (std::size_t i = 0; i < num_values; ++i) { | ||
| 366 | AddLine("MOV.F {}.{}, {};", value, Swizzle(i), Visit(meta.values[i])); | ||
| 367 | } | ||
| 368 | |||
| 369 | AddLine("ATOMIM.{}.{} {}.x, {}, {}, image[{}], {};", op, type, coord, value, coord, | ||
| 370 | image_id, ImageType(meta.image.type)); | ||
| 371 | return fmt::format("{}.x", coord); | ||
| 372 | } | ||
| 373 | |||
| 374 | template <const std::string_view& op, const std::string_view& type> | ||
| 375 | std::string Atomic(Operation operation) { | ||
| 376 | std::string temporary = AllocTemporary(); | ||
| 377 | std::string address; | ||
| 378 | std::string_view opname; | ||
| 379 | bool robust = false; | ||
| 380 | if (const auto gmem = std::get_if<GmemNode>(&*operation[0])) { | ||
| 381 | address = GlobalMemoryPointer(*gmem); | ||
| 382 | opname = "ATOM"; | ||
| 383 | robust = true; | ||
| 384 | } else if (const auto smem = std::get_if<SmemNode>(&*operation[0])) { | ||
| 385 | address = fmt::format("shared_mem[{}]", Visit(smem->GetAddress())); | ||
| 386 | opname = "ATOMS"; | ||
| 387 | } else { | ||
| 388 | UNREACHABLE(); | ||
| 389 | return "{0, 0, 0, 0}"; | ||
| 390 | } | ||
| 391 | if (robust) { | ||
| 392 | AddLine("IF NE.x;"); | ||
| 393 | } | ||
| 394 | AddLine("{}.{}.{} {}, {}, {};", opname, op, type, temporary, Visit(operation[1]), address); | ||
| 395 | if (robust) { | ||
| 396 | AddLine("ELSE;"); | ||
| 397 | AddLine("MOV.S {}, 0;", temporary); | ||
| 398 | AddLine("ENDIF;"); | ||
| 399 | } | ||
| 400 | return temporary; | ||
| 401 | } | ||
| 402 | |||
| 403 | template <char type> | ||
| 404 | std::string Negate(Operation operation) { | ||
| 405 | std::string temporary = AllocTemporary(); | ||
| 406 | if constexpr (type == 'F') { | ||
| 407 | AddLine("MOV.F32 {}, -{};", temporary, Visit(operation[0])); | ||
| 408 | } else { | ||
| 409 | AddLine("MOV.{} {}, -{};", type, temporary, Visit(operation[0])); | ||
| 410 | } | ||
| 411 | return temporary; | ||
| 412 | } | ||
| 413 | |||
| 414 | template <char type> | ||
| 415 | std::string Absolute(Operation operation) { | ||
| 416 | std::string temporary = AllocTemporary(); | ||
| 417 | AddLine("MOV.{} {}, |{}|;", type, temporary, Visit(operation[0])); | ||
| 418 | return temporary; | ||
| 419 | } | ||
| 420 | |||
| 421 | template <char type> | ||
| 422 | std::string BitfieldInsert(Operation operation) { | ||
| 423 | const std::string temporary = AllocVectorTemporary(); | ||
| 424 | AddLine("MOV.{} {}.x, {};", type, temporary, Visit(operation[3])); | ||
| 425 | AddLine("MOV.{} {}.y, {};", type, temporary, Visit(operation[2])); | ||
| 426 | AddLine("BFI.{} {}.x, {}, {}, {};", type, temporary, temporary, Visit(operation[1]), | ||
| 427 | Visit(operation[0])); | ||
| 428 | return fmt::format("{}.x", temporary); | ||
| 429 | } | ||
| 430 | |||
| 431 | template <char type> | ||
| 432 | std::string BitfieldExtract(Operation operation) { | ||
| 433 | const std::string temporary = AllocVectorTemporary(); | ||
| 434 | AddLine("MOV.{} {}.x, {};", type, temporary, Visit(operation[2])); | ||
| 435 | AddLine("MOV.{} {}.y, {};", type, temporary, Visit(operation[1])); | ||
| 436 | AddLine("BFE.{} {}.x, {}, {};", type, temporary, temporary, Visit(operation[0])); | ||
| 437 | return fmt::format("{}.x", temporary); | ||
| 438 | } | ||
| 439 | |||
| 440 | template <char swizzle> | ||
| 441 | std::string LocalInvocationId(Operation) { | ||
| 442 | return fmt::format("invocation.localid.{}", swizzle); | ||
| 443 | } | ||
| 444 | |||
| 445 | template <char swizzle> | ||
| 446 | std::string WorkGroupId(Operation) { | ||
| 447 | return fmt::format("invocation.groupid.{}", swizzle); | ||
| 448 | } | ||
| 449 | |||
| 450 | template <char c1, char c2> | ||
| 451 | std::string ThreadMask(Operation) { | ||
| 452 | return fmt::format("{}.thread{}{}mask", StageInputName(stage), c1, c2); | ||
| 453 | } | ||
| 454 | |||
| 455 | template <typename... Args> | ||
| 456 | void AddExpression(std::string_view text, Args&&... args) { | ||
| 457 | shader_source += fmt::format(fmt::runtime(text), std::forward<Args>(args)...); | ||
| 458 | } | ||
| 459 | |||
| 460 | template <typename... Args> | ||
| 461 | void AddLine(std::string_view text, Args&&... args) { | ||
| 462 | AddExpression(text, std::forward<Args>(args)...); | ||
| 463 | shader_source += '\n'; | ||
| 464 | } | ||
| 465 | |||
| 466 | std::string AllocLongVectorTemporary() { | ||
| 467 | max_long_temporaries = std::max(max_long_temporaries, num_long_temporaries + 1); | ||
| 468 | return fmt::format("L{}", num_long_temporaries++); | ||
| 469 | } | ||
| 470 | |||
| 471 | std::string AllocLongTemporary() { | ||
| 472 | return fmt::format("{}.x", AllocLongVectorTemporary()); | ||
| 473 | } | ||
| 474 | |||
| 475 | std::string AllocVectorTemporary() { | ||
| 476 | max_temporaries = std::max(max_temporaries, num_temporaries + 1); | ||
| 477 | return fmt::format("T{}", num_temporaries++); | ||
| 478 | } | ||
| 479 | |||
| 480 | std::string AllocTemporary() { | ||
| 481 | return fmt::format("{}.x", AllocVectorTemporary()); | ||
| 482 | } | ||
| 483 | |||
| 484 | void ResetTemporaries() noexcept { | ||
| 485 | num_temporaries = 0; | ||
| 486 | num_long_temporaries = 0; | ||
| 487 | } | ||
| 488 | |||
| 489 | const Device& device; | ||
| 490 | const ShaderIR& ir; | ||
| 491 | const Registry& registry; | ||
| 492 | const ShaderType stage; | ||
| 493 | |||
| 494 | std::size_t num_temporaries = 0; | ||
| 495 | std::size_t max_temporaries = 0; | ||
| 496 | |||
| 497 | std::size_t num_long_temporaries = 0; | ||
| 498 | std::size_t max_long_temporaries = 0; | ||
| 499 | |||
| 500 | std::map<GlobalMemoryBase, u32> global_memory_names; | ||
| 501 | |||
| 502 | std::string shader_source; | ||
| 503 | |||
| 504 | static constexpr std::string_view ADD_F32 = "ADD.F32"; | ||
| 505 | static constexpr std::string_view ADD_S = "ADD.S"; | ||
| 506 | static constexpr std::string_view ADD_U = "ADD.U"; | ||
| 507 | static constexpr std::string_view MUL_F32 = "MUL.F32"; | ||
| 508 | static constexpr std::string_view MUL_S = "MUL.S"; | ||
| 509 | static constexpr std::string_view MUL_U = "MUL.U"; | ||
| 510 | static constexpr std::string_view DIV_F32 = "DIV.F32"; | ||
| 511 | static constexpr std::string_view DIV_S = "DIV.S"; | ||
| 512 | static constexpr std::string_view DIV_U = "DIV.U"; | ||
| 513 | static constexpr std::string_view MAD_F32 = "MAD.F32"; | ||
| 514 | static constexpr std::string_view RSQ_F32 = "RSQ.F32"; | ||
| 515 | static constexpr std::string_view COS_F32 = "COS.F32"; | ||
| 516 | static constexpr std::string_view SIN_F32 = "SIN.F32"; | ||
| 517 | static constexpr std::string_view EX2_F32 = "EX2.F32"; | ||
| 518 | static constexpr std::string_view LG2_F32 = "LG2.F32"; | ||
| 519 | static constexpr std::string_view SLT_F = "SLT.F32"; | ||
| 520 | static constexpr std::string_view SLT_S = "SLT.S"; | ||
| 521 | static constexpr std::string_view SLT_U = "SLT.U"; | ||
| 522 | static constexpr std::string_view SEQ_F = "SEQ.F32"; | ||
| 523 | static constexpr std::string_view SEQ_S = "SEQ.S"; | ||
| 524 | static constexpr std::string_view SEQ_U = "SEQ.U"; | ||
| 525 | static constexpr std::string_view SLE_F = "SLE.F32"; | ||
| 526 | static constexpr std::string_view SLE_S = "SLE.S"; | ||
| 527 | static constexpr std::string_view SLE_U = "SLE.U"; | ||
| 528 | static constexpr std::string_view SGT_F = "SGT.F32"; | ||
| 529 | static constexpr std::string_view SGT_S = "SGT.S"; | ||
| 530 | static constexpr std::string_view SGT_U = "SGT.U"; | ||
| 531 | static constexpr std::string_view SNE_F = "SNE.F32"; | ||
| 532 | static constexpr std::string_view SNE_S = "SNE.S"; | ||
| 533 | static constexpr std::string_view SNE_U = "SNE.U"; | ||
| 534 | static constexpr std::string_view SGE_F = "SGE.F32"; | ||
| 535 | static constexpr std::string_view SGE_S = "SGE.S"; | ||
| 536 | static constexpr std::string_view SGE_U = "SGE.U"; | ||
| 537 | static constexpr std::string_view AND_S = "AND.S"; | ||
| 538 | static constexpr std::string_view AND_U = "AND.U"; | ||
| 539 | static constexpr std::string_view TRUNC_F = "TRUNC.F"; | ||
| 540 | static constexpr std::string_view TRUNC_S = "TRUNC.S"; | ||
| 541 | static constexpr std::string_view TRUNC_U = "TRUNC.U"; | ||
| 542 | static constexpr std::string_view SHL_S = "SHL.S"; | ||
| 543 | static constexpr std::string_view SHL_U = "SHL.U"; | ||
| 544 | static constexpr std::string_view SHR_S = "SHR.S"; | ||
| 545 | static constexpr std::string_view SHR_U = "SHR.U"; | ||
| 546 | static constexpr std::string_view OR_S = "OR.S"; | ||
| 547 | static constexpr std::string_view OR_U = "OR.U"; | ||
| 548 | static constexpr std::string_view XOR_S = "XOR.S"; | ||
| 549 | static constexpr std::string_view XOR_U = "XOR.U"; | ||
| 550 | static constexpr std::string_view NOT_S = "NOT.S"; | ||
| 551 | static constexpr std::string_view NOT_U = "NOT.U"; | ||
| 552 | static constexpr std::string_view BTC_S = "BTC.S"; | ||
| 553 | static constexpr std::string_view BTC_U = "BTC.U"; | ||
| 554 | static constexpr std::string_view BTFM_S = "BTFM.S"; | ||
| 555 | static constexpr std::string_view BTFM_U = "BTFM.U"; | ||
| 556 | static constexpr std::string_view ROUND_F = "ROUND.F"; | ||
| 557 | static constexpr std::string_view CEIL_F = "CEIL.F"; | ||
| 558 | static constexpr std::string_view FLR_F = "FLR.F"; | ||
| 559 | static constexpr std::string_view I2F_S = "I2F.S"; | ||
| 560 | static constexpr std::string_view I2F_U = "I2F.U"; | ||
| 561 | static constexpr std::string_view MIN_F = "MIN.F"; | ||
| 562 | static constexpr std::string_view MIN_S = "MIN.S"; | ||
| 563 | static constexpr std::string_view MIN_U = "MIN.U"; | ||
| 564 | static constexpr std::string_view MAX_F = "MAX.F"; | ||
| 565 | static constexpr std::string_view MAX_S = "MAX.S"; | ||
| 566 | static constexpr std::string_view MAX_U = "MAX.U"; | ||
| 567 | static constexpr std::string_view MOV_U = "MOV.U"; | ||
| 568 | static constexpr std::string_view TGBALLOT_U = "TGBALLOT.U"; | ||
| 569 | static constexpr std::string_view TGALL_U = "TGALL.U"; | ||
| 570 | static constexpr std::string_view TGANY_U = "TGANY.U"; | ||
| 571 | static constexpr std::string_view TGEQ_U = "TGEQ.U"; | ||
| 572 | static constexpr std::string_view EXCH = "EXCH"; | ||
| 573 | static constexpr std::string_view ADD = "ADD"; | ||
| 574 | static constexpr std::string_view MIN = "MIN"; | ||
| 575 | static constexpr std::string_view MAX = "MAX"; | ||
| 576 | static constexpr std::string_view AND = "AND"; | ||
| 577 | static constexpr std::string_view OR = "OR"; | ||
| 578 | static constexpr std::string_view XOR = "XOR"; | ||
| 579 | static constexpr std::string_view U32 = "U32"; | ||
| 580 | static constexpr std::string_view S32 = "S32"; | ||
| 581 | |||
| 582 | static constexpr std::size_t NUM_ENTRIES = static_cast<std::size_t>(OperationCode::Amount); | ||
| 583 | using DecompilerType = std::string (ARBDecompiler::*)(Operation); | ||
| 584 | static constexpr std::array<DecompilerType, NUM_ENTRIES> OPERATION_DECOMPILERS = { | ||
| 585 | &ARBDecompiler::Assign, | ||
| 586 | |||
| 587 | &ARBDecompiler::Select, | ||
| 588 | |||
| 589 | &ARBDecompiler::Binary<ADD_F32>, | ||
| 590 | &ARBDecompiler::Binary<MUL_F32>, | ||
| 591 | &ARBDecompiler::Binary<DIV_F32>, | ||
| 592 | &ARBDecompiler::Trinary<MAD_F32>, | ||
| 593 | &ARBDecompiler::Negate<'F'>, | ||
| 594 | &ARBDecompiler::Absolute<'F'>, | ||
| 595 | &ARBDecompiler::FClamp, | ||
| 596 | &ARBDecompiler::FCastHalf0, | ||
| 597 | &ARBDecompiler::FCastHalf1, | ||
| 598 | &ARBDecompiler::Binary<MIN_F>, | ||
| 599 | &ARBDecompiler::Binary<MAX_F>, | ||
| 600 | &ARBDecompiler::Unary<COS_F32>, | ||
| 601 | &ARBDecompiler::Unary<SIN_F32>, | ||
| 602 | &ARBDecompiler::Unary<EX2_F32>, | ||
| 603 | &ARBDecompiler::Unary<LG2_F32>, | ||
| 604 | &ARBDecompiler::Unary<RSQ_F32>, | ||
| 605 | &ARBDecompiler::FSqrt, | ||
| 606 | &ARBDecompiler::Unary<ROUND_F>, | ||
| 607 | &ARBDecompiler::Unary<FLR_F>, | ||
| 608 | &ARBDecompiler::Unary<CEIL_F>, | ||
| 609 | &ARBDecompiler::Unary<TRUNC_F>, | ||
| 610 | &ARBDecompiler::Unary<I2F_S>, | ||
| 611 | &ARBDecompiler::Unary<I2F_U>, | ||
| 612 | &ARBDecompiler::FSwizzleAdd, | ||
| 613 | |||
| 614 | &ARBDecompiler::Binary<ADD_S>, | ||
| 615 | &ARBDecompiler::Binary<MUL_S>, | ||
| 616 | &ARBDecompiler::Binary<DIV_S>, | ||
| 617 | &ARBDecompiler::Negate<'S'>, | ||
| 618 | &ARBDecompiler::Absolute<'S'>, | ||
| 619 | &ARBDecompiler::Binary<MIN_S>, | ||
| 620 | &ARBDecompiler::Binary<MAX_S>, | ||
| 621 | |||
| 622 | &ARBDecompiler::Unary<TRUNC_S>, | ||
| 623 | &ARBDecompiler::Unary<MOV_U>, | ||
| 624 | &ARBDecompiler::Binary<SHL_S>, | ||
| 625 | &ARBDecompiler::Binary<SHR_U>, | ||
| 626 | &ARBDecompiler::Binary<SHR_S>, | ||
| 627 | &ARBDecompiler::Binary<AND_S>, | ||
| 628 | &ARBDecompiler::Binary<OR_S>, | ||
| 629 | &ARBDecompiler::Binary<XOR_S>, | ||
| 630 | &ARBDecompiler::Unary<NOT_S>, | ||
| 631 | &ARBDecompiler::BitfieldInsert<'S'>, | ||
| 632 | &ARBDecompiler::BitfieldExtract<'S'>, | ||
| 633 | &ARBDecompiler::Unary<BTC_S>, | ||
| 634 | &ARBDecompiler::Unary<BTFM_S>, | ||
| 635 | |||
| 636 | &ARBDecompiler::Binary<ADD_U>, | ||
| 637 | &ARBDecompiler::Binary<MUL_U>, | ||
| 638 | &ARBDecompiler::Binary<DIV_U>, | ||
| 639 | &ARBDecompiler::Binary<MIN_U>, | ||
| 640 | &ARBDecompiler::Binary<MAX_U>, | ||
| 641 | &ARBDecompiler::Unary<TRUNC_U>, | ||
| 642 | &ARBDecompiler::Unary<MOV_U>, | ||
| 643 | &ARBDecompiler::Binary<SHL_U>, | ||
| 644 | &ARBDecompiler::Binary<SHR_U>, | ||
| 645 | &ARBDecompiler::Binary<SHR_U>, | ||
| 646 | &ARBDecompiler::Binary<AND_U>, | ||
| 647 | &ARBDecompiler::Binary<OR_U>, | ||
| 648 | &ARBDecompiler::Binary<XOR_U>, | ||
| 649 | &ARBDecompiler::Unary<NOT_U>, | ||
| 650 | &ARBDecompiler::BitfieldInsert<'U'>, | ||
| 651 | &ARBDecompiler::BitfieldExtract<'U'>, | ||
| 652 | &ARBDecompiler::Unary<BTC_U>, | ||
| 653 | &ARBDecompiler::Unary<BTFM_U>, | ||
| 654 | |||
| 655 | &ARBDecompiler::HAdd2, | ||
| 656 | &ARBDecompiler::HMul2, | ||
| 657 | &ARBDecompiler::HFma2, | ||
| 658 | &ARBDecompiler::HAbsolute, | ||
| 659 | &ARBDecompiler::HNegate, | ||
| 660 | &ARBDecompiler::HClamp, | ||
| 661 | &ARBDecompiler::HCastFloat, | ||
| 662 | &ARBDecompiler::HUnpack, | ||
| 663 | &ARBDecompiler::HMergeF32, | ||
| 664 | &ARBDecompiler::HMergeH0, | ||
| 665 | &ARBDecompiler::HMergeH1, | ||
| 666 | &ARBDecompiler::HPack2, | ||
| 667 | |||
| 668 | &ARBDecompiler::LogicalAssign, | ||
| 669 | &ARBDecompiler::Binary<AND_U>, | ||
| 670 | &ARBDecompiler::Binary<OR_U>, | ||
| 671 | &ARBDecompiler::Binary<XOR_U>, | ||
| 672 | &ARBDecompiler::Unary<NOT_U>, | ||
| 673 | &ARBDecompiler::LogicalPick2, | ||
| 674 | &ARBDecompiler::LogicalAnd2, | ||
| 675 | |||
| 676 | &ARBDecompiler::FloatComparison<SLT_F, false>, | ||
| 677 | &ARBDecompiler::FloatComparison<SEQ_F, false>, | ||
| 678 | &ARBDecompiler::FloatComparison<SLE_F, false>, | ||
| 679 | &ARBDecompiler::FloatComparison<SGT_F, false>, | ||
| 680 | &ARBDecompiler::FloatComparison<SNE_F, false>, | ||
| 681 | &ARBDecompiler::FloatComparison<SGE_F, false>, | ||
| 682 | &ARBDecompiler::FloatOrdered, | ||
| 683 | &ARBDecompiler::FloatUnordered, | ||
| 684 | &ARBDecompiler::FloatComparison<SLT_F, true>, | ||
| 685 | &ARBDecompiler::FloatComparison<SEQ_F, true>, | ||
| 686 | &ARBDecompiler::FloatComparison<SLE_F, true>, | ||
| 687 | &ARBDecompiler::FloatComparison<SGT_F, true>, | ||
| 688 | &ARBDecompiler::FloatComparison<SNE_F, true>, | ||
| 689 | &ARBDecompiler::FloatComparison<SGE_F, true>, | ||
| 690 | |||
| 691 | &ARBDecompiler::Binary<SLT_S>, | ||
| 692 | &ARBDecompiler::Binary<SEQ_S>, | ||
| 693 | &ARBDecompiler::Binary<SLE_S>, | ||
| 694 | &ARBDecompiler::Binary<SGT_S>, | ||
| 695 | &ARBDecompiler::Binary<SNE_S>, | ||
| 696 | &ARBDecompiler::Binary<SGE_S>, | ||
| 697 | |||
| 698 | &ARBDecompiler::Binary<SLT_U>, | ||
| 699 | &ARBDecompiler::Binary<SEQ_U>, | ||
| 700 | &ARBDecompiler::Binary<SLE_U>, | ||
| 701 | &ARBDecompiler::Binary<SGT_U>, | ||
| 702 | &ARBDecompiler::Binary<SNE_U>, | ||
| 703 | &ARBDecompiler::Binary<SGE_U>, | ||
| 704 | |||
| 705 | &ARBDecompiler::LogicalAddCarry, | ||
| 706 | |||
| 707 | &ARBDecompiler::HalfComparison<SLT_F, false>, | ||
| 708 | &ARBDecompiler::HalfComparison<SEQ_F, false>, | ||
| 709 | &ARBDecompiler::HalfComparison<SLE_F, false>, | ||
| 710 | &ARBDecompiler::HalfComparison<SGT_F, false>, | ||
| 711 | &ARBDecompiler::HalfComparison<SNE_F, false>, | ||
| 712 | &ARBDecompiler::HalfComparison<SGE_F, false>, | ||
| 713 | &ARBDecompiler::HalfComparison<SLT_F, true>, | ||
| 714 | &ARBDecompiler::HalfComparison<SEQ_F, true>, | ||
| 715 | &ARBDecompiler::HalfComparison<SLE_F, true>, | ||
| 716 | &ARBDecompiler::HalfComparison<SGT_F, true>, | ||
| 717 | &ARBDecompiler::HalfComparison<SNE_F, true>, | ||
| 718 | &ARBDecompiler::HalfComparison<SGE_F, true>, | ||
| 719 | |||
| 720 | &ARBDecompiler::Texture, | ||
| 721 | &ARBDecompiler::Texture, | ||
| 722 | &ARBDecompiler::TextureGather, | ||
| 723 | &ARBDecompiler::TextureQueryDimensions, | ||
| 724 | &ARBDecompiler::TextureQueryLod, | ||
| 725 | &ARBDecompiler::TexelFetch, | ||
| 726 | &ARBDecompiler::TextureGradient, | ||
| 727 | |||
| 728 | &ARBDecompiler::ImageLoad, | ||
| 729 | &ARBDecompiler::ImageStore, | ||
| 730 | |||
| 731 | &ARBDecompiler::AtomicImage<ADD, U32>, | ||
| 732 | &ARBDecompiler::AtomicImage<AND, U32>, | ||
| 733 | &ARBDecompiler::AtomicImage<OR, U32>, | ||
| 734 | &ARBDecompiler::AtomicImage<XOR, U32>, | ||
| 735 | &ARBDecompiler::AtomicImage<EXCH, U32>, | ||
| 736 | |||
| 737 | &ARBDecompiler::Atomic<EXCH, U32>, | ||
| 738 | &ARBDecompiler::Atomic<ADD, U32>, | ||
| 739 | &ARBDecompiler::Atomic<MIN, U32>, | ||
| 740 | &ARBDecompiler::Atomic<MAX, U32>, | ||
| 741 | &ARBDecompiler::Atomic<AND, U32>, | ||
| 742 | &ARBDecompiler::Atomic<OR, U32>, | ||
| 743 | &ARBDecompiler::Atomic<XOR, U32>, | ||
| 744 | |||
| 745 | &ARBDecompiler::Atomic<EXCH, S32>, | ||
| 746 | &ARBDecompiler::Atomic<ADD, S32>, | ||
| 747 | &ARBDecompiler::Atomic<MIN, S32>, | ||
| 748 | &ARBDecompiler::Atomic<MAX, S32>, | ||
| 749 | &ARBDecompiler::Atomic<AND, S32>, | ||
| 750 | &ARBDecompiler::Atomic<OR, S32>, | ||
| 751 | &ARBDecompiler::Atomic<XOR, S32>, | ||
| 752 | |||
| 753 | &ARBDecompiler::Atomic<ADD, U32>, | ||
| 754 | &ARBDecompiler::Atomic<MIN, U32>, | ||
| 755 | &ARBDecompiler::Atomic<MAX, U32>, | ||
| 756 | &ARBDecompiler::Atomic<AND, U32>, | ||
| 757 | &ARBDecompiler::Atomic<OR, U32>, | ||
| 758 | &ARBDecompiler::Atomic<XOR, U32>, | ||
| 759 | |||
| 760 | &ARBDecompiler::Atomic<ADD, S32>, | ||
| 761 | &ARBDecompiler::Atomic<MIN, S32>, | ||
| 762 | &ARBDecompiler::Atomic<MAX, S32>, | ||
| 763 | &ARBDecompiler::Atomic<AND, S32>, | ||
| 764 | &ARBDecompiler::Atomic<OR, S32>, | ||
| 765 | &ARBDecompiler::Atomic<XOR, S32>, | ||
| 766 | |||
| 767 | &ARBDecompiler::Branch, | ||
| 768 | &ARBDecompiler::BranchIndirect, | ||
| 769 | &ARBDecompiler::PushFlowStack, | ||
| 770 | &ARBDecompiler::PopFlowStack, | ||
| 771 | &ARBDecompiler::Exit, | ||
| 772 | &ARBDecompiler::Discard, | ||
| 773 | |||
| 774 | &ARBDecompiler::EmitVertex, | ||
| 775 | &ARBDecompiler::EndPrimitive, | ||
| 776 | |||
| 777 | &ARBDecompiler::InvocationId, | ||
| 778 | &ARBDecompiler::YNegate, | ||
| 779 | &ARBDecompiler::LocalInvocationId<'x'>, | ||
| 780 | &ARBDecompiler::LocalInvocationId<'y'>, | ||
| 781 | &ARBDecompiler::LocalInvocationId<'z'>, | ||
| 782 | &ARBDecompiler::WorkGroupId<'x'>, | ||
| 783 | &ARBDecompiler::WorkGroupId<'y'>, | ||
| 784 | &ARBDecompiler::WorkGroupId<'z'>, | ||
| 785 | |||
| 786 | &ARBDecompiler::Unary<TGBALLOT_U>, | ||
| 787 | &ARBDecompiler::Unary<TGALL_U>, | ||
| 788 | &ARBDecompiler::Unary<TGANY_U>, | ||
| 789 | &ARBDecompiler::Unary<TGEQ_U>, | ||
| 790 | |||
| 791 | &ARBDecompiler::ThreadId, | ||
| 792 | &ARBDecompiler::ThreadMask<'e', 'q'>, | ||
| 793 | &ARBDecompiler::ThreadMask<'g', 'e'>, | ||
| 794 | &ARBDecompiler::ThreadMask<'g', 't'>, | ||
| 795 | &ARBDecompiler::ThreadMask<'l', 'e'>, | ||
| 796 | &ARBDecompiler::ThreadMask<'l', 't'>, | ||
| 797 | &ARBDecompiler::ShuffleIndexed, | ||
| 798 | |||
| 799 | &ARBDecompiler::Barrier, | ||
| 800 | &ARBDecompiler::MemoryBarrierGroup, | ||
| 801 | &ARBDecompiler::MemoryBarrierGlobal, | ||
| 802 | }; | ||
| 803 | }; | ||
| 804 | |||
| 805 | ARBDecompiler::ARBDecompiler(const Device& device_, const ShaderIR& ir_, const Registry& registry_, | ||
| 806 | ShaderType stage_, std::string_view identifier) | ||
| 807 | : device{device_}, ir{ir_}, registry{registry_}, stage{stage_} { | ||
| 808 | DefineGlobalMemory(); | ||
| 809 | |||
| 810 | AddLine("TEMP RC;"); | ||
| 811 | AddLine("TEMP FSWZA[4];"); | ||
| 812 | AddLine("TEMP FSWZB[4];"); | ||
| 813 | if (ir.IsDecompiled()) { | ||
| 814 | DecompileAST(); | ||
| 815 | } else { | ||
| 816 | DecompileBranchMode(); | ||
| 817 | } | ||
| 818 | AddLine("END"); | ||
| 819 | |||
| 820 | const std::string code = std::move(shader_source); | ||
| 821 | DeclareHeader(); | ||
| 822 | DeclareVertex(); | ||
| 823 | DeclareGeometry(); | ||
| 824 | DeclareFragment(); | ||
| 825 | DeclareCompute(); | ||
| 826 | DeclareInputAttributes(); | ||
| 827 | DeclareOutputAttributes(); | ||
| 828 | DeclareLocalMemory(); | ||
| 829 | DeclareGlobalMemory(); | ||
| 830 | DeclareConstantBuffers(); | ||
| 831 | DeclareRegisters(); | ||
| 832 | DeclareTemporaries(); | ||
| 833 | DeclarePredicates(); | ||
| 834 | DeclareInternalFlags(); | ||
| 835 | |||
| 836 | shader_source += code; | ||
| 837 | } | ||
| 838 | |||
| 839 | std::string_view HeaderStageName(ShaderType stage) { | ||
| 840 | switch (stage) { | ||
| 841 | case ShaderType::Vertex: | ||
| 842 | return "vp"; | ||
| 843 | case ShaderType::Geometry: | ||
| 844 | return "gp"; | ||
| 845 | case ShaderType::Fragment: | ||
| 846 | return "fp"; | ||
| 847 | case ShaderType::Compute: | ||
| 848 | return "cp"; | ||
| 849 | default: | ||
| 850 | UNREACHABLE(); | ||
| 851 | return ""; | ||
| 852 | } | ||
| 853 | } | ||
| 854 | |||
| 855 | void ARBDecompiler::DefineGlobalMemory() { | ||
| 856 | u32 binding = 0; | ||
| 857 | for (const auto& pair : ir.GetGlobalMemory()) { | ||
| 858 | const GlobalMemoryBase base = pair.first; | ||
| 859 | global_memory_names.emplace(base, binding); | ||
| 860 | ++binding; | ||
| 861 | } | ||
| 862 | } | ||
| 863 | |||
| 864 | void ARBDecompiler::DeclareHeader() { | ||
| 865 | AddLine("!!NV{}5.0", HeaderStageName(stage)); | ||
| 866 | // Enabling this allows us to cheat on some instructions like TXL with SHADOWARRAY2D | ||
| 867 | AddLine("OPTION NV_internal;"); | ||
| 868 | AddLine("OPTION NV_gpu_program_fp64;"); | ||
| 869 | AddLine("OPTION NV_shader_thread_group;"); | ||
| 870 | if (ir.UsesWarps() && device.HasWarpIntrinsics()) { | ||
| 871 | AddLine("OPTION NV_shader_thread_shuffle;"); | ||
| 872 | } | ||
| 873 | if (stage == ShaderType::Vertex) { | ||
| 874 | if (device.HasNvViewportArray2()) { | ||
| 875 | AddLine("OPTION NV_viewport_array2;"); | ||
| 876 | } | ||
| 877 | } | ||
| 878 | if (stage == ShaderType::Fragment) { | ||
| 879 | AddLine("OPTION ARB_draw_buffers;"); | ||
| 880 | } | ||
| 881 | if (device.HasImageLoadFormatted()) { | ||
| 882 | AddLine("OPTION EXT_shader_image_load_formatted;"); | ||
| 883 | } | ||
| 884 | } | ||
| 885 | |||
| 886 | void ARBDecompiler::DeclareVertex() { | ||
| 887 | if (stage != ShaderType::Vertex) { | ||
| 888 | return; | ||
| 889 | } | ||
| 890 | AddLine("OUTPUT result_clip[] = {{ result.clip[0..7] }};"); | ||
| 891 | } | ||
| 892 | |||
| 893 | void ARBDecompiler::DeclareGeometry() { | ||
| 894 | if (stage != ShaderType::Geometry) { | ||
| 895 | return; | ||
| 896 | } | ||
| 897 | const auto& info = registry.GetGraphicsInfo(); | ||
| 898 | const auto& header = ir.GetHeader(); | ||
| 899 | AddLine("PRIMITIVE_IN {};", PrimitiveDescription(info.primitive_topology)); | ||
| 900 | AddLine("PRIMITIVE_OUT {};", TopologyName(header.common3.output_topology)); | ||
| 901 | AddLine("VERTICES_OUT {};", header.common4.max_output_vertices.Value()); | ||
| 902 | AddLine("ATTRIB vertex_position = vertex.position;"); | ||
| 903 | } | ||
| 904 | |||
| 905 | void ARBDecompiler::DeclareFragment() { | ||
| 906 | if (stage != ShaderType::Fragment) { | ||
| 907 | return; | ||
| 908 | } | ||
| 909 | AddLine("OUTPUT result_color7 = result.color[7];"); | ||
| 910 | AddLine("OUTPUT result_color6 = result.color[6];"); | ||
| 911 | AddLine("OUTPUT result_color5 = result.color[5];"); | ||
| 912 | AddLine("OUTPUT result_color4 = result.color[4];"); | ||
| 913 | AddLine("OUTPUT result_color3 = result.color[3];"); | ||
| 914 | AddLine("OUTPUT result_color2 = result.color[2];"); | ||
| 915 | AddLine("OUTPUT result_color1 = result.color[1];"); | ||
| 916 | AddLine("OUTPUT result_color0 = result.color;"); | ||
| 917 | } | ||
| 918 | |||
| 919 | void ARBDecompiler::DeclareCompute() { | ||
| 920 | if (stage != ShaderType::Compute) { | ||
| 921 | return; | ||
| 922 | } | ||
| 923 | const ComputeInfo& info = registry.GetComputeInfo(); | ||
| 924 | AddLine("GROUP_SIZE {} {} {};", info.workgroup_size[0], info.workgroup_size[1], | ||
| 925 | info.workgroup_size[2]); | ||
| 926 | if (info.shared_memory_size_in_words == 0) { | ||
| 927 | return; | ||
| 928 | } | ||
| 929 | const u32 limit = device.GetMaxComputeSharedMemorySize(); | ||
| 930 | u32 size_in_bytes = info.shared_memory_size_in_words * 4; | ||
| 931 | if (size_in_bytes > limit) { | ||
| 932 | LOG_ERROR(Render_OpenGL, "Shared memory size {} is clamped to host's limit {}", | ||
| 933 | size_in_bytes, limit); | ||
| 934 | size_in_bytes = limit; | ||
| 935 | } | ||
| 936 | |||
| 937 | AddLine("SHARED_MEMORY {};", size_in_bytes); | ||
| 938 | AddLine("SHARED shared_mem[] = {{program.sharedmem}};"); | ||
| 939 | } | ||
| 940 | |||
| 941 | void ARBDecompiler::DeclareInputAttributes() { | ||
| 942 | if (stage == ShaderType::Compute) { | ||
| 943 | return; | ||
| 944 | } | ||
| 945 | const std::string_view stage_name = StageInputName(stage); | ||
| 946 | for (const auto attribute : ir.GetInputAttributes()) { | ||
| 947 | if (!IsGenericAttribute(attribute)) { | ||
| 948 | continue; | ||
| 949 | } | ||
| 950 | const u32 index = GetGenericAttributeIndex(attribute); | ||
| 951 | |||
| 952 | std::string_view suffix; | ||
| 953 | if (stage == ShaderType::Fragment) { | ||
| 954 | const auto input_mode{ir.GetHeader().ps.GetPixelImap(index)}; | ||
| 955 | if (input_mode == PixelImap::Unused) { | ||
| 956 | return; | ||
| 957 | } | ||
| 958 | suffix = GetInputFlags(input_mode); | ||
| 959 | } | ||
| 960 | AddLine("{}ATTRIB in_attr{}[] = {{ {}.attrib[{}..{}] }};", suffix, index, stage_name, index, | ||
| 961 | index); | ||
| 962 | } | ||
| 963 | } | ||
| 964 | |||
| 965 | void ARBDecompiler::DeclareOutputAttributes() { | ||
| 966 | if (stage == ShaderType::Compute) { | ||
| 967 | return; | ||
| 968 | } | ||
| 969 | for (const auto attribute : ir.GetOutputAttributes()) { | ||
| 970 | if (!IsGenericAttribute(attribute)) { | ||
| 971 | continue; | ||
| 972 | } | ||
| 973 | const u32 index = GetGenericAttributeIndex(attribute); | ||
| 974 | AddLine("OUTPUT out_attr{}[] = {{ result.attrib[{}..{}] }};", index, index, index); | ||
| 975 | } | ||
| 976 | } | ||
| 977 | |||
| 978 | void ARBDecompiler::DeclareLocalMemory() { | ||
| 979 | u64 size = 0; | ||
| 980 | if (stage == ShaderType::Compute) { | ||
| 981 | size = registry.GetComputeInfo().local_memory_size_in_words * 4ULL; | ||
| 982 | } else { | ||
| 983 | size = ir.GetHeader().GetLocalMemorySize(); | ||
| 984 | } | ||
| 985 | if (size == 0) { | ||
| 986 | return; | ||
| 987 | } | ||
| 988 | const u64 element_count = Common::AlignUp(size, 4) / 4; | ||
| 989 | AddLine("TEMP lmem[{}];", element_count); | ||
| 990 | } | ||
| 991 | |||
| 992 | void ARBDecompiler::DeclareGlobalMemory() { | ||
| 993 | const size_t num_entries = ir.GetGlobalMemory().size(); | ||
| 994 | if (num_entries > 0) { | ||
| 995 | AddLine("PARAM c[{}] = {{ program.local[0..{}] }};", num_entries, num_entries - 1); | ||
| 996 | } | ||
| 997 | } | ||
| 998 | |||
| 999 | void ARBDecompiler::DeclareConstantBuffers() { | ||
| 1000 | u32 binding = 0; | ||
| 1001 | for (const auto& cbuf : ir.GetConstantBuffers()) { | ||
| 1002 | AddLine("CBUFFER cbuf{}[] = {{ program.buffer[{}] }};", cbuf.first, binding); | ||
| 1003 | ++binding; | ||
| 1004 | } | ||
| 1005 | } | ||
| 1006 | |||
| 1007 | void ARBDecompiler::DeclareRegisters() { | ||
| 1008 | for (const u32 gpr : ir.GetRegisters()) { | ||
| 1009 | AddLine("TEMP R{};", gpr); | ||
| 1010 | } | ||
| 1011 | } | ||
| 1012 | |||
| 1013 | void ARBDecompiler::DeclareTemporaries() { | ||
| 1014 | for (std::size_t i = 0; i < max_temporaries; ++i) { | ||
| 1015 | AddLine("TEMP T{};", i); | ||
| 1016 | } | ||
| 1017 | for (std::size_t i = 0; i < max_long_temporaries; ++i) { | ||
| 1018 | AddLine("LONG TEMP L{};", i); | ||
| 1019 | } | ||
| 1020 | } | ||
| 1021 | |||
| 1022 | void ARBDecompiler::DeclarePredicates() { | ||
| 1023 | for (const Tegra::Shader::Pred pred : ir.GetPredicates()) { | ||
| 1024 | AddLine("TEMP P{};", static_cast<u64>(pred)); | ||
| 1025 | } | ||
| 1026 | } | ||
| 1027 | |||
| 1028 | void ARBDecompiler::DeclareInternalFlags() { | ||
| 1029 | for (const char* name : INTERNAL_FLAG_NAMES) { | ||
| 1030 | AddLine("TEMP {};", name); | ||
| 1031 | } | ||
| 1032 | } | ||
| 1033 | |||
| 1034 | void ARBDecompiler::InitializeVariables() { | ||
| 1035 | AddLine("MOV.F32 FSWZA[0], -1;"); | ||
| 1036 | AddLine("MOV.F32 FSWZA[1], 1;"); | ||
| 1037 | AddLine("MOV.F32 FSWZA[2], -1;"); | ||
| 1038 | AddLine("MOV.F32 FSWZA[3], 0;"); | ||
| 1039 | AddLine("MOV.F32 FSWZB[0], -1;"); | ||
| 1040 | AddLine("MOV.F32 FSWZB[1], -1;"); | ||
| 1041 | AddLine("MOV.F32 FSWZB[2], 1;"); | ||
| 1042 | AddLine("MOV.F32 FSWZB[3], -1;"); | ||
| 1043 | |||
| 1044 | if (stage == ShaderType::Vertex || stage == ShaderType::Geometry) { | ||
| 1045 | AddLine("MOV.F result.position, {{0, 0, 0, 1}};"); | ||
| 1046 | } | ||
| 1047 | for (const auto attribute : ir.GetOutputAttributes()) { | ||
| 1048 | if (!IsGenericAttribute(attribute)) { | ||
| 1049 | continue; | ||
| 1050 | } | ||
| 1051 | const u32 index = GetGenericAttributeIndex(attribute); | ||
| 1052 | AddLine("MOV.F result.attrib[{}], {{0, 0, 0, 1}};", index); | ||
| 1053 | } | ||
| 1054 | for (const u32 gpr : ir.GetRegisters()) { | ||
| 1055 | AddLine("MOV.F R{}, {{0, 0, 0, 0}};", gpr); | ||
| 1056 | } | ||
| 1057 | for (const Tegra::Shader::Pred pred : ir.GetPredicates()) { | ||
| 1058 | AddLine("MOV.U P{}, {{0, 0, 0, 0}};", static_cast<u64>(pred)); | ||
| 1059 | } | ||
| 1060 | } | ||
| 1061 | |||
| 1062 | void ARBDecompiler::DecompileAST() { | ||
| 1063 | const u32 num_flow_variables = ir.GetASTNumVariables(); | ||
| 1064 | for (u32 i = 0; i < num_flow_variables; ++i) { | ||
| 1065 | AddLine("TEMP F{};", i); | ||
| 1066 | } | ||
| 1067 | for (u32 i = 0; i < num_flow_variables; ++i) { | ||
| 1068 | AddLine("MOV.U F{}, {{0, 0, 0, 0}};", i); | ||
| 1069 | } | ||
| 1070 | |||
| 1071 | InitializeVariables(); | ||
| 1072 | |||
| 1073 | VisitAST(ir.GetASTProgram()); | ||
| 1074 | } | ||
| 1075 | |||
| 1076 | void ARBDecompiler::DecompileBranchMode() { | ||
| 1077 | static constexpr u32 FLOW_STACK_SIZE = 20; | ||
| 1078 | if (!ir.IsFlowStackDisabled()) { | ||
| 1079 | AddLine("TEMP SSY[{}];", FLOW_STACK_SIZE); | ||
| 1080 | AddLine("TEMP PBK[{}];", FLOW_STACK_SIZE); | ||
| 1081 | AddLine("TEMP SSY_TOP;"); | ||
| 1082 | AddLine("TEMP PBK_TOP;"); | ||
| 1083 | } | ||
| 1084 | |||
| 1085 | AddLine("TEMP PC;"); | ||
| 1086 | |||
| 1087 | if (!ir.IsFlowStackDisabled()) { | ||
| 1088 | AddLine("MOV.U SSY_TOP.x, 0;"); | ||
| 1089 | AddLine("MOV.U PBK_TOP.x, 0;"); | ||
| 1090 | } | ||
| 1091 | |||
| 1092 | InitializeVariables(); | ||
| 1093 | |||
| 1094 | const auto basic_block_end = ir.GetBasicBlocks().end(); | ||
| 1095 | auto basic_block_it = ir.GetBasicBlocks().begin(); | ||
| 1096 | const u32 first_address = basic_block_it->first; | ||
| 1097 | AddLine("MOV.U PC.x, {};", first_address); | ||
| 1098 | |||
| 1099 | AddLine("REP;"); | ||
| 1100 | |||
| 1101 | std::size_t num_blocks = 0; | ||
| 1102 | while (basic_block_it != basic_block_end) { | ||
| 1103 | const auto& [address, bb] = *basic_block_it; | ||
| 1104 | ++num_blocks; | ||
| 1105 | |||
| 1106 | AddLine("SEQ.S.CC RC.x, PC.x, {};", address); | ||
| 1107 | AddLine("IF NE.x;"); | ||
| 1108 | |||
| 1109 | VisitBlock(bb); | ||
| 1110 | |||
| 1111 | ++basic_block_it; | ||
| 1112 | |||
| 1113 | if (basic_block_it != basic_block_end) { | ||
| 1114 | const auto op = std::get_if<OperationNode>(&*bb[bb.size() - 1]); | ||
| 1115 | if (!op || op->GetCode() != OperationCode::Branch) { | ||
| 1116 | const u32 next_address = basic_block_it->first; | ||
| 1117 | AddLine("MOV.U PC.x, {};", next_address); | ||
| 1118 | AddLine("CONT;"); | ||
| 1119 | } | ||
| 1120 | } | ||
| 1121 | |||
| 1122 | AddLine("ELSE;"); | ||
| 1123 | } | ||
| 1124 | AddLine("RET;"); | ||
| 1125 | while (num_blocks--) { | ||
| 1126 | AddLine("ENDIF;"); | ||
| 1127 | } | ||
| 1128 | |||
| 1129 | AddLine("ENDREP;"); | ||
| 1130 | } | ||
| 1131 | |||
| 1132 | void ARBDecompiler::VisitAST(const ASTNode& node) { | ||
| 1133 | if (const auto ast = std::get_if<ASTProgram>(&*node->GetInnerData())) { | ||
| 1134 | for (ASTNode current = ast->nodes.GetFirst(); current; current = current->GetNext()) { | ||
| 1135 | VisitAST(current); | ||
| 1136 | } | ||
| 1137 | } else if (const auto if_then = std::get_if<ASTIfThen>(&*node->GetInnerData())) { | ||
| 1138 | const std::string condition = VisitExpression(if_then->condition); | ||
| 1139 | ResetTemporaries(); | ||
| 1140 | |||
| 1141 | AddLine("MOVC.U RC.x, {};", condition); | ||
| 1142 | AddLine("IF NE.x;"); | ||
| 1143 | for (ASTNode current = if_then->nodes.GetFirst(); current; current = current->GetNext()) { | ||
| 1144 | VisitAST(current); | ||
| 1145 | } | ||
| 1146 | AddLine("ENDIF;"); | ||
| 1147 | } else if (const auto if_else = std::get_if<ASTIfElse>(&*node->GetInnerData())) { | ||
| 1148 | AddLine("ELSE;"); | ||
| 1149 | for (ASTNode current = if_else->nodes.GetFirst(); current; current = current->GetNext()) { | ||
| 1150 | VisitAST(current); | ||
| 1151 | } | ||
| 1152 | } else if (const auto decoded = std::get_if<ASTBlockDecoded>(&*node->GetInnerData())) { | ||
| 1153 | VisitBlock(decoded->nodes); | ||
| 1154 | } else if (const auto var_set = std::get_if<ASTVarSet>(&*node->GetInnerData())) { | ||
| 1155 | AddLine("MOV.U F{}, {};", var_set->index, VisitExpression(var_set->condition)); | ||
| 1156 | ResetTemporaries(); | ||
| 1157 | } else if (const auto do_while = std::get_if<ASTDoWhile>(&*node->GetInnerData())) { | ||
| 1158 | const std::string condition = VisitExpression(do_while->condition); | ||
| 1159 | ResetTemporaries(); | ||
| 1160 | AddLine("REP;"); | ||
| 1161 | for (ASTNode current = do_while->nodes.GetFirst(); current; current = current->GetNext()) { | ||
| 1162 | VisitAST(current); | ||
| 1163 | } | ||
| 1164 | AddLine("MOVC.U RC.x, {};", condition); | ||
| 1165 | AddLine("BRK (NE.x);"); | ||
| 1166 | AddLine("ENDREP;"); | ||
| 1167 | } else if (const auto ast_return = std::get_if<ASTReturn>(&*node->GetInnerData())) { | ||
| 1168 | const bool is_true = ExprIsTrue(ast_return->condition); | ||
| 1169 | if (!is_true) { | ||
| 1170 | AddLine("MOVC.U RC.x, {};", VisitExpression(ast_return->condition)); | ||
| 1171 | AddLine("IF NE.x;"); | ||
| 1172 | ResetTemporaries(); | ||
| 1173 | } | ||
| 1174 | if (ast_return->kills) { | ||
| 1175 | AddLine("KIL TR;"); | ||
| 1176 | } else { | ||
| 1177 | Exit(); | ||
| 1178 | } | ||
| 1179 | if (!is_true) { | ||
| 1180 | AddLine("ENDIF;"); | ||
| 1181 | } | ||
| 1182 | } else if (const auto ast_break = std::get_if<ASTBreak>(&*node->GetInnerData())) { | ||
| 1183 | if (ExprIsTrue(ast_break->condition)) { | ||
| 1184 | AddLine("BRK;"); | ||
| 1185 | } else { | ||
| 1186 | AddLine("MOVC.U RC.x, {};", VisitExpression(ast_break->condition)); | ||
| 1187 | AddLine("BRK (NE.x);"); | ||
| 1188 | ResetTemporaries(); | ||
| 1189 | } | ||
| 1190 | } else if (std::holds_alternative<ASTLabel>(*node->GetInnerData())) { | ||
| 1191 | // Nothing to do | ||
| 1192 | } else { | ||
| 1193 | UNREACHABLE(); | ||
| 1194 | } | ||
| 1195 | } | ||
| 1196 | |||
| 1197 | std::string ARBDecompiler::VisitExpression(const Expr& node) { | ||
| 1198 | if (const auto expr = std::get_if<ExprAnd>(&*node)) { | ||
| 1199 | std::string result = AllocTemporary(); | ||
| 1200 | AddLine("AND.U {}, {}, {};", result, VisitExpression(expr->operand1), | ||
| 1201 | VisitExpression(expr->operand2)); | ||
| 1202 | return result; | ||
| 1203 | } | ||
| 1204 | if (const auto expr = std::get_if<ExprOr>(&*node)) { | ||
| 1205 | std::string result = AllocTemporary(); | ||
| 1206 | AddLine("OR.U {}, {}, {};", result, VisitExpression(expr->operand1), | ||
| 1207 | VisitExpression(expr->operand2)); | ||
| 1208 | return result; | ||
| 1209 | } | ||
| 1210 | if (const auto expr = std::get_if<ExprNot>(&*node)) { | ||
| 1211 | std::string result = AllocTemporary(); | ||
| 1212 | AddLine("CMP.S {}, {}, 0, -1;", result, VisitExpression(expr->operand1)); | ||
| 1213 | return result; | ||
| 1214 | } | ||
| 1215 | if (const auto expr = std::get_if<ExprPredicate>(&*node)) { | ||
| 1216 | return fmt::format("P{}.x", static_cast<u64>(expr->predicate)); | ||
| 1217 | } | ||
| 1218 | if (const auto expr = std::get_if<ExprCondCode>(&*node)) { | ||
| 1219 | return Visit(ir.GetConditionCode(expr->cc)); | ||
| 1220 | } | ||
| 1221 | if (const auto expr = std::get_if<ExprVar>(&*node)) { | ||
| 1222 | return fmt::format("F{}.x", expr->var_index); | ||
| 1223 | } | ||
| 1224 | if (const auto expr = std::get_if<ExprBoolean>(&*node)) { | ||
| 1225 | return expr->value ? "0xffffffff" : "0"; | ||
| 1226 | } | ||
| 1227 | if (const auto expr = std::get_if<ExprGprEqual>(&*node)) { | ||
| 1228 | std::string result = AllocTemporary(); | ||
| 1229 | AddLine("SEQ.U {}, R{}.x, {};", result, expr->gpr, expr->value); | ||
| 1230 | return result; | ||
| 1231 | } | ||
| 1232 | UNREACHABLE(); | ||
| 1233 | return "0"; | ||
| 1234 | } | ||
| 1235 | |||
| 1236 | void ARBDecompiler::VisitBlock(const NodeBlock& bb) { | ||
| 1237 | for (const auto& node : bb) { | ||
| 1238 | Visit(node); | ||
| 1239 | } | ||
| 1240 | } | ||
| 1241 | |||
| 1242 | std::string ARBDecompiler::Visit(const Node& node) { | ||
| 1243 | if (const auto operation = std::get_if<OperationNode>(&*node)) { | ||
| 1244 | if (const auto amend_index = operation->GetAmendIndex()) { | ||
| 1245 | Visit(ir.GetAmendNode(*amend_index)); | ||
| 1246 | } | ||
| 1247 | const std::size_t index = static_cast<std::size_t>(operation->GetCode()); | ||
| 1248 | if (index >= OPERATION_DECOMPILERS.size()) { | ||
| 1249 | UNREACHABLE_MSG("Out of bounds operation: {}", index); | ||
| 1250 | return {}; | ||
| 1251 | } | ||
| 1252 | const auto decompiler = OPERATION_DECOMPILERS[index]; | ||
| 1253 | if (decompiler == nullptr) { | ||
| 1254 | UNREACHABLE_MSG("Undefined operation: {}", index); | ||
| 1255 | return {}; | ||
| 1256 | } | ||
| 1257 | return (this->*decompiler)(*operation); | ||
| 1258 | } | ||
| 1259 | |||
| 1260 | if (const auto gpr = std::get_if<GprNode>(&*node)) { | ||
| 1261 | const u32 index = gpr->GetIndex(); | ||
| 1262 | if (index == Register::ZeroIndex) { | ||
| 1263 | return "{0, 0, 0, 0}.x"; | ||
| 1264 | } | ||
| 1265 | return fmt::format("R{}.x", index); | ||
| 1266 | } | ||
| 1267 | |||
| 1268 | if (const auto cv = std::get_if<CustomVarNode>(&*node)) { | ||
| 1269 | return fmt::format("CV{}.x", cv->GetIndex()); | ||
| 1270 | } | ||
| 1271 | |||
| 1272 | if (const auto immediate = std::get_if<ImmediateNode>(&*node)) { | ||
| 1273 | std::string temporary = AllocTemporary(); | ||
| 1274 | AddLine("MOV.U {}, {};", temporary, immediate->GetValue()); | ||
| 1275 | return temporary; | ||
| 1276 | } | ||
| 1277 | |||
| 1278 | if (const auto predicate = std::get_if<PredicateNode>(&*node)) { | ||
| 1279 | std::string temporary = AllocTemporary(); | ||
| 1280 | switch (const auto index = predicate->GetIndex(); index) { | ||
| 1281 | case Tegra::Shader::Pred::UnusedIndex: | ||
| 1282 | AddLine("MOV.S {}, -1;", temporary); | ||
| 1283 | break; | ||
| 1284 | case Tegra::Shader::Pred::NeverExecute: | ||
| 1285 | AddLine("MOV.S {}, 0;", temporary); | ||
| 1286 | break; | ||
| 1287 | default: | ||
| 1288 | AddLine("MOV.S {}, P{}.x;", temporary, static_cast<u64>(index)); | ||
| 1289 | break; | ||
| 1290 | } | ||
| 1291 | if (predicate->IsNegated()) { | ||
| 1292 | AddLine("CMP.S {}, {}, 0, -1;", temporary, temporary); | ||
| 1293 | } | ||
| 1294 | return temporary; | ||
| 1295 | } | ||
| 1296 | |||
| 1297 | if (const auto abuf = std::get_if<AbufNode>(&*node)) { | ||
| 1298 | if (abuf->IsPhysicalBuffer()) { | ||
| 1299 | UNIMPLEMENTED_MSG("Physical buffers are not implemented"); | ||
| 1300 | return "{0, 0, 0, 0}.x"; | ||
| 1301 | } | ||
| 1302 | |||
| 1303 | const Attribute::Index index = abuf->GetIndex(); | ||
| 1304 | const u32 element = abuf->GetElement(); | ||
| 1305 | const char swizzle = Swizzle(element); | ||
| 1306 | switch (index) { | ||
| 1307 | case Attribute::Index::Position: { | ||
| 1308 | if (stage == ShaderType::Geometry) { | ||
| 1309 | return fmt::format("{}_position[{}].{}", StageInputName(stage), | ||
| 1310 | Visit(abuf->GetBuffer()), swizzle); | ||
| 1311 | } else { | ||
| 1312 | return fmt::format("{}.position.{}", StageInputName(stage), swizzle); | ||
| 1313 | } | ||
| 1314 | } | ||
| 1315 | case Attribute::Index::TessCoordInstanceIDVertexID: | ||
| 1316 | ASSERT(stage == ShaderType::Vertex); | ||
| 1317 | switch (element) { | ||
| 1318 | case 2: | ||
| 1319 | return "vertex.instance"; | ||
| 1320 | case 3: | ||
| 1321 | return "vertex.id"; | ||
| 1322 | } | ||
| 1323 | UNIMPLEMENTED_MSG("Unmanaged TessCoordInstanceIDVertexID element={}", element); | ||
| 1324 | break; | ||
| 1325 | case Attribute::Index::PointCoord: | ||
| 1326 | switch (element) { | ||
| 1327 | case 0: | ||
| 1328 | return "fragment.pointcoord.x"; | ||
| 1329 | case 1: | ||
| 1330 | return "fragment.pointcoord.y"; | ||
| 1331 | } | ||
| 1332 | UNIMPLEMENTED(); | ||
| 1333 | break; | ||
| 1334 | case Attribute::Index::FrontFacing: { | ||
| 1335 | ASSERT(stage == ShaderType::Fragment); | ||
| 1336 | ASSERT(element == 3); | ||
| 1337 | const std::string temporary = AllocVectorTemporary(); | ||
| 1338 | AddLine("SGT.S RC.x, fragment.facing, {{0, 0, 0, 0}};"); | ||
| 1339 | AddLine("MOV.U.CC RC.x, -RC;"); | ||
| 1340 | AddLine("MOV.S {}.x, 0;", temporary); | ||
| 1341 | AddLine("MOV.S {}.x (NE.x), -1;", temporary); | ||
| 1342 | return fmt::format("{}.x", temporary); | ||
| 1343 | } | ||
| 1344 | default: | ||
| 1345 | if (IsGenericAttribute(index)) { | ||
| 1346 | if (stage == ShaderType::Geometry) { | ||
| 1347 | return fmt::format("in_attr{}[{}][0].{}", GetGenericAttributeIndex(index), | ||
| 1348 | Visit(abuf->GetBuffer()), swizzle); | ||
| 1349 | } else { | ||
| 1350 | return fmt::format("{}.attrib[{}].{}", StageInputName(stage), | ||
| 1351 | GetGenericAttributeIndex(index), swizzle); | ||
| 1352 | } | ||
| 1353 | } | ||
| 1354 | UNIMPLEMENTED_MSG("Unimplemented input attribute={}", index); | ||
| 1355 | break; | ||
| 1356 | } | ||
| 1357 | return "{0, 0, 0, 0}.x"; | ||
| 1358 | } | ||
| 1359 | |||
| 1360 | if (const auto cbuf = std::get_if<CbufNode>(&*node)) { | ||
| 1361 | std::string offset_string; | ||
| 1362 | const auto& offset = cbuf->GetOffset(); | ||
| 1363 | if (const auto imm = std::get_if<ImmediateNode>(&*offset)) { | ||
| 1364 | offset_string = std::to_string(imm->GetValue()); | ||
| 1365 | } else { | ||
| 1366 | offset_string = Visit(offset); | ||
| 1367 | } | ||
| 1368 | std::string temporary = AllocTemporary(); | ||
| 1369 | AddLine("LDC.F32 {}, cbuf{}[{}];", temporary, cbuf->GetIndex(), offset_string); | ||
| 1370 | return temporary; | ||
| 1371 | } | ||
| 1372 | |||
| 1373 | if (const auto gmem = std::get_if<GmemNode>(&*node)) { | ||
| 1374 | std::string temporary = AllocTemporary(); | ||
| 1375 | AddLine("MOV {}, 0;", temporary); | ||
| 1376 | AddLine("LOAD.U32 {} (NE.x), {};", temporary, GlobalMemoryPointer(*gmem)); | ||
| 1377 | return temporary; | ||
| 1378 | } | ||
| 1379 | |||
| 1380 | if (const auto lmem = std::get_if<LmemNode>(&*node)) { | ||
| 1381 | std::string temporary = Visit(lmem->GetAddress()); | ||
| 1382 | AddLine("SHR.U {}, {}, 2;", temporary, temporary); | ||
| 1383 | AddLine("MOV.U {}, lmem[{}].x;", temporary, temporary); | ||
| 1384 | return temporary; | ||
| 1385 | } | ||
| 1386 | |||
| 1387 | if (const auto smem = std::get_if<SmemNode>(&*node)) { | ||
| 1388 | std::string temporary = Visit(smem->GetAddress()); | ||
| 1389 | AddLine("LDS.U32 {}, shared_mem[{}];", temporary, temporary); | ||
| 1390 | return temporary; | ||
| 1391 | } | ||
| 1392 | |||
| 1393 | if (const auto internal_flag = std::get_if<InternalFlagNode>(&*node)) { | ||
| 1394 | const std::size_t index = static_cast<std::size_t>(internal_flag->GetFlag()); | ||
| 1395 | return fmt::format("{}.x", INTERNAL_FLAG_NAMES[index]); | ||
| 1396 | } | ||
| 1397 | |||
| 1398 | if (const auto conditional = std::get_if<ConditionalNode>(&*node)) { | ||
| 1399 | if (const auto amend_index = conditional->GetAmendIndex()) { | ||
| 1400 | Visit(ir.GetAmendNode(*amend_index)); | ||
| 1401 | } | ||
| 1402 | AddLine("MOVC.U RC.x, {};", Visit(conditional->GetCondition())); | ||
| 1403 | AddLine("IF NE.x;"); | ||
| 1404 | VisitBlock(conditional->GetCode()); | ||
| 1405 | AddLine("ENDIF;"); | ||
| 1406 | return {}; | ||
| 1407 | } | ||
| 1408 | |||
| 1409 | if ([[maybe_unused]] const auto cmt = std::get_if<CommentNode>(&*node)) { | ||
| 1410 | // Uncommenting this will generate invalid code. GLASM lacks comments. | ||
| 1411 | // AddLine("// {}", cmt->GetText()); | ||
| 1412 | return {}; | ||
| 1413 | } | ||
| 1414 | |||
| 1415 | UNIMPLEMENTED(); | ||
| 1416 | return {}; | ||
| 1417 | } | ||
| 1418 | |||
| 1419 | std::tuple<std::string, std::string, std::size_t> ARBDecompiler::BuildCoords(Operation operation) { | ||
| 1420 | const auto& meta = std::get<MetaTexture>(operation.GetMeta()); | ||
| 1421 | UNIMPLEMENTED_IF(meta.sampler.is_indexed); | ||
| 1422 | |||
| 1423 | const bool is_extended = meta.sampler.is_shadow && meta.sampler.is_array && | ||
| 1424 | meta.sampler.type == Tegra::Shader::TextureType::TextureCube; | ||
| 1425 | const std::size_t count = operation.GetOperandsCount(); | ||
| 1426 | std::string temporary = AllocVectorTemporary(); | ||
| 1427 | std::size_t i = 0; | ||
| 1428 | for (; i < count; ++i) { | ||
| 1429 | AddLine("MOV.F {}.{}, {};", temporary, Swizzle(i), Visit(operation[i])); | ||
| 1430 | } | ||
| 1431 | if (meta.sampler.is_array) { | ||
| 1432 | AddLine("I2F.S {}.{}, {};", temporary, Swizzle(i), Visit(meta.array)); | ||
| 1433 | ++i; | ||
| 1434 | } | ||
| 1435 | if (meta.sampler.is_shadow) { | ||
| 1436 | std::string compare = Visit(meta.depth_compare); | ||
| 1437 | if (is_extended) { | ||
| 1438 | ASSERT(i == 4); | ||
| 1439 | std::string extra_coord = AllocVectorTemporary(); | ||
| 1440 | AddLine("MOV.F {}.x, {};", extra_coord, compare); | ||
| 1441 | return {fmt::format("{}, {}", temporary, extra_coord), extra_coord, 0}; | ||
| 1442 | } | ||
| 1443 | AddLine("MOV.F {}.{}, {};", temporary, Swizzle(i), compare); | ||
| 1444 | ++i; | ||
| 1445 | } | ||
| 1446 | return {temporary, temporary, i}; | ||
| 1447 | } | ||
| 1448 | |||
| 1449 | std::string ARBDecompiler::BuildAoffi(Operation operation) { | ||
| 1450 | const auto& meta = std::get<MetaTexture>(operation.GetMeta()); | ||
| 1451 | if (meta.aoffi.empty()) { | ||
| 1452 | return {}; | ||
| 1453 | } | ||
| 1454 | const std::string temporary = AllocVectorTemporary(); | ||
| 1455 | std::size_t i = 0; | ||
| 1456 | for (auto& node : meta.aoffi) { | ||
| 1457 | AddLine("MOV.S {}.{}, {};", temporary, Swizzle(i++), Visit(node)); | ||
| 1458 | } | ||
| 1459 | return fmt::format(", offset({})", temporary); | ||
| 1460 | } | ||
| 1461 | |||
| 1462 | std::string ARBDecompiler::GlobalMemoryPointer(const GmemNode& gmem) { | ||
| 1463 | // Read a bindless SSBO, return its address and set CC accordingly | ||
| 1464 | // address = c[binding].xy | ||
| 1465 | // length = c[binding].z | ||
| 1466 | const u32 binding = global_memory_names.at(gmem.GetDescriptor()); | ||
| 1467 | |||
| 1468 | const std::string pointer = AllocLongVectorTemporary(); | ||
| 1469 | std::string temporary = AllocTemporary(); | ||
| 1470 | |||
| 1471 | AddLine("PK64.U {}, c[{}];", pointer, binding); | ||
| 1472 | AddLine("SUB.U {}, {}, {};", temporary, Visit(gmem.GetRealAddress()), | ||
| 1473 | Visit(gmem.GetBaseAddress())); | ||
| 1474 | AddLine("CVT.U64.U32 {}.z, {};", pointer, temporary); | ||
| 1475 | AddLine("ADD.U64 {}.x, {}.x, {}.z;", pointer, pointer, pointer); | ||
| 1476 | // Compare offset to length and set CC | ||
| 1477 | AddLine("SLT.U.CC RC.x, {}, c[{}].z;", temporary, binding); | ||
| 1478 | return fmt::format("{}.x", pointer); | ||
| 1479 | } | ||
| 1480 | |||
| 1481 | void ARBDecompiler::Exit() { | ||
| 1482 | if (stage != ShaderType::Fragment) { | ||
| 1483 | AddLine("RET;"); | ||
| 1484 | return; | ||
| 1485 | } | ||
| 1486 | |||
| 1487 | const auto safe_get_register = [this](u32 reg) -> std::string { | ||
| 1488 | if (ir.GetRegisters().contains(reg)) { | ||
| 1489 | return fmt::format("R{}.x", reg); | ||
| 1490 | } | ||
| 1491 | return "{0, 0, 0, 0}.x"; | ||
| 1492 | }; | ||
| 1493 | |||
| 1494 | const auto& header = ir.GetHeader(); | ||
| 1495 | u32 current_reg = 0; | ||
| 1496 | for (u32 rt = 0; rt < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets; ++rt) { | ||
| 1497 | for (u32 component = 0; component < 4; ++component) { | ||
| 1498 | if (!header.ps.IsColorComponentOutputEnabled(rt, component)) { | ||
| 1499 | continue; | ||
| 1500 | } | ||
| 1501 | AddLine("MOV.F result_color{}.{}, {};", rt, Swizzle(component), | ||
| 1502 | safe_get_register(current_reg)); | ||
| 1503 | ++current_reg; | ||
| 1504 | } | ||
| 1505 | } | ||
| 1506 | if (header.ps.omap.depth) { | ||
| 1507 | AddLine("MOV.F result.depth.z, {};", safe_get_register(current_reg + 1)); | ||
| 1508 | } | ||
| 1509 | |||
| 1510 | AddLine("RET;"); | ||
| 1511 | } | ||
| 1512 | |||
| 1513 | std::string ARBDecompiler::Assign(Operation operation) { | ||
| 1514 | const Node& dest = operation[0]; | ||
| 1515 | const Node& src = operation[1]; | ||
| 1516 | |||
| 1517 | std::string dest_name; | ||
| 1518 | if (const auto gpr = std::get_if<GprNode>(&*dest)) { | ||
| 1519 | if (gpr->GetIndex() == Register::ZeroIndex) { | ||
| 1520 | // Writing to Register::ZeroIndex is a no op | ||
| 1521 | return {}; | ||
| 1522 | } | ||
| 1523 | dest_name = fmt::format("R{}.x", gpr->GetIndex()); | ||
| 1524 | } else if (const auto abuf = std::get_if<AbufNode>(&*dest)) { | ||
| 1525 | const u32 element = abuf->GetElement(); | ||
| 1526 | const char swizzle = Swizzle(element); | ||
| 1527 | switch (const Attribute::Index index = abuf->GetIndex()) { | ||
| 1528 | case Attribute::Index::Position: | ||
| 1529 | dest_name = fmt::format("result.position.{}", swizzle); | ||
| 1530 | break; | ||
| 1531 | case Attribute::Index::LayerViewportPointSize: | ||
| 1532 | switch (element) { | ||
| 1533 | case 0: | ||
| 1534 | UNIMPLEMENTED(); | ||
| 1535 | return {}; | ||
| 1536 | case 1: | ||
| 1537 | case 2: | ||
| 1538 | if (!device.HasNvViewportArray2()) { | ||
| 1539 | LOG_ERROR( | ||
| 1540 | Render_OpenGL, | ||
| 1541 | "NV_viewport_array2 is missing. Maxwell gen 2 or better is required."); | ||
| 1542 | return {}; | ||
| 1543 | } | ||
| 1544 | dest_name = element == 1 ? "result.layer.x" : "result.viewport.x"; | ||
| 1545 | break; | ||
| 1546 | case 3: | ||
| 1547 | dest_name = "result.pointsize.x"; | ||
| 1548 | break; | ||
| 1549 | } | ||
| 1550 | break; | ||
| 1551 | case Attribute::Index::ClipDistances0123: | ||
| 1552 | dest_name = fmt::format("result.clip[{}].x", element); | ||
| 1553 | break; | ||
| 1554 | case Attribute::Index::ClipDistances4567: | ||
| 1555 | dest_name = fmt::format("result.clip[{}].x", element + 4); | ||
| 1556 | break; | ||
| 1557 | default: | ||
| 1558 | if (!IsGenericAttribute(index)) { | ||
| 1559 | UNREACHABLE(); | ||
| 1560 | return {}; | ||
| 1561 | } | ||
| 1562 | dest_name = | ||
| 1563 | fmt::format("result.attrib[{}].{}", GetGenericAttributeIndex(index), swizzle); | ||
| 1564 | break; | ||
| 1565 | } | ||
| 1566 | } else if (const auto lmem = std::get_if<LmemNode>(&*dest)) { | ||
| 1567 | const std::string address = Visit(lmem->GetAddress()); | ||
| 1568 | AddLine("SHR.U {}, {}, 2;", address, address); | ||
| 1569 | dest_name = fmt::format("lmem[{}].x", address); | ||
| 1570 | } else if (const auto smem = std::get_if<SmemNode>(&*dest)) { | ||
| 1571 | AddLine("STS.U32 {}, shared_mem[{}];", Visit(src), Visit(smem->GetAddress())); | ||
| 1572 | ResetTemporaries(); | ||
| 1573 | return {}; | ||
| 1574 | } else if (const auto gmem = std::get_if<GmemNode>(&*dest)) { | ||
| 1575 | AddLine("IF NE.x;"); | ||
| 1576 | AddLine("STORE.U32 {}, {};", Visit(src), GlobalMemoryPointer(*gmem)); | ||
| 1577 | AddLine("ENDIF;"); | ||
| 1578 | ResetTemporaries(); | ||
| 1579 | return {}; | ||
| 1580 | } else { | ||
| 1581 | UNREACHABLE(); | ||
| 1582 | ResetTemporaries(); | ||
| 1583 | return {}; | ||
| 1584 | } | ||
| 1585 | |||
| 1586 | AddLine("MOV.U {}, {};", dest_name, Visit(src)); | ||
| 1587 | ResetTemporaries(); | ||
| 1588 | return {}; | ||
| 1589 | } | ||
| 1590 | |||
| 1591 | std::string ARBDecompiler::Select(Operation operation) { | ||
| 1592 | std::string temporary = AllocTemporary(); | ||
| 1593 | AddLine("CMP.S {}, {}, {}, {};", temporary, Visit(operation[0]), Visit(operation[1]), | ||
| 1594 | Visit(operation[2])); | ||
| 1595 | return temporary; | ||
| 1596 | } | ||
| 1597 | |||
| 1598 | std::string ARBDecompiler::FClamp(Operation operation) { | ||
| 1599 | // 1.0f in hex, replace with std::bit_cast on C++20 | ||
| 1600 | static constexpr u32 POSITIVE_ONE = 0x3f800000; | ||
| 1601 | |||
| 1602 | std::string temporary = AllocTemporary(); | ||
| 1603 | const Node& value = operation[0]; | ||
| 1604 | const Node& low = operation[1]; | ||
| 1605 | const Node& high = operation[2]; | ||
| 1606 | const auto* const imm_low = std::get_if<ImmediateNode>(&*low); | ||
| 1607 | const auto* const imm_high = std::get_if<ImmediateNode>(&*high); | ||
| 1608 | if (imm_low && imm_high && imm_low->GetValue() == 0 && imm_high->GetValue() == POSITIVE_ONE) { | ||
| 1609 | AddLine("MOV.F32.SAT {}, {};", temporary, Visit(value)); | ||
| 1610 | } else { | ||
| 1611 | AddLine("MIN.F {}, {}, {};", temporary, Visit(value), Visit(high)); | ||
| 1612 | AddLine("MAX.F {}, {}, {};", temporary, temporary, Visit(low)); | ||
| 1613 | } | ||
| 1614 | return temporary; | ||
| 1615 | } | ||
| 1616 | |||
| 1617 | std::string ARBDecompiler::FCastHalf0(Operation operation) { | ||
| 1618 | const std::string temporary = AllocVectorTemporary(); | ||
| 1619 | AddLine("UP2H.F {}.x, {};", temporary, Visit(operation[0])); | ||
| 1620 | return fmt::format("{}.x", temporary); | ||
| 1621 | } | ||
| 1622 | |||
| 1623 | std::string ARBDecompiler::FCastHalf1(Operation operation) { | ||
| 1624 | const std::string temporary = AllocVectorTemporary(); | ||
| 1625 | AddLine("UP2H.F {}.y, {};", temporary, Visit(operation[0])); | ||
| 1626 | AddLine("MOV {}.x, {}.y;", temporary, temporary); | ||
| 1627 | return fmt::format("{}.x", temporary); | ||
| 1628 | } | ||
| 1629 | |||
| 1630 | std::string ARBDecompiler::FSqrt(Operation operation) { | ||
| 1631 | std::string temporary = AllocTemporary(); | ||
| 1632 | AddLine("RSQ.F32 {}, {};", temporary, Visit(operation[0])); | ||
| 1633 | AddLine("RCP.F32 {}, {};", temporary, temporary); | ||
| 1634 | return temporary; | ||
| 1635 | } | ||
| 1636 | |||
| 1637 | std::string ARBDecompiler::FSwizzleAdd(Operation operation) { | ||
| 1638 | const std::string temporary = AllocVectorTemporary(); | ||
| 1639 | if (!device.HasWarpIntrinsics()) { | ||
| 1640 | LOG_ERROR(Render_OpenGL, | ||
| 1641 | "NV_shader_thread_shuffle is missing. Kepler or better is required."); | ||
| 1642 | AddLine("ADD.F {}.x, {}, {};", temporary, Visit(operation[0]), Visit(operation[1])); | ||
| 1643 | return fmt::format("{}.x", temporary); | ||
| 1644 | } | ||
| 1645 | |||
| 1646 | AddLine("AND.U {}.z, {}.threadid, 3;", temporary, StageInputName(stage)); | ||
| 1647 | AddLine("SHL.U {}.z, {}.z, 1;", temporary, temporary); | ||
| 1648 | AddLine("SHR.U {}.z, {}, {}.z;", temporary, Visit(operation[2]), temporary); | ||
| 1649 | AddLine("AND.U {}.z, {}.z, 3;", temporary, temporary); | ||
| 1650 | AddLine("MUL.F32 {}.x, {}, FSWZA[{}.z];", temporary, Visit(operation[0]), temporary); | ||
| 1651 | AddLine("MUL.F32 {}.y, {}, FSWZB[{}.z];", temporary, Visit(operation[1]), temporary); | ||
| 1652 | AddLine("ADD.F32 {}.x, {}.x, {}.y;", temporary, temporary, temporary); | ||
| 1653 | return fmt::format("{}.x", temporary); | ||
| 1654 | } | ||
| 1655 | |||
| 1656 | std::string ARBDecompiler::HAdd2(Operation operation) { | ||
| 1657 | const std::string tmp1 = AllocVectorTemporary(); | ||
| 1658 | const std::string tmp2 = AllocVectorTemporary(); | ||
| 1659 | AddLine("UP2H.F {}.xy, {};", tmp1, Visit(operation[0])); | ||
| 1660 | AddLine("UP2H.F {}.xy, {};", tmp2, Visit(operation[1])); | ||
| 1661 | AddLine("ADD.F16 {}, {}, {};", tmp1, tmp1, tmp2); | ||
| 1662 | AddLine("PK2H.F {}.x, {};", tmp1, tmp1); | ||
| 1663 | return fmt::format("{}.x", tmp1); | ||
| 1664 | } | ||
| 1665 | |||
| 1666 | std::string ARBDecompiler::HMul2(Operation operation) { | ||
| 1667 | const std::string tmp1 = AllocVectorTemporary(); | ||
| 1668 | const std::string tmp2 = AllocVectorTemporary(); | ||
| 1669 | AddLine("UP2H.F {}.xy, {};", tmp1, Visit(operation[0])); | ||
| 1670 | AddLine("UP2H.F {}.xy, {};", tmp2, Visit(operation[1])); | ||
| 1671 | AddLine("MUL.F16 {}, {}, {};", tmp1, tmp1, tmp2); | ||
| 1672 | AddLine("PK2H.F {}.x, {};", tmp1, tmp1); | ||
| 1673 | return fmt::format("{}.x", tmp1); | ||
| 1674 | } | ||
| 1675 | |||
| 1676 | std::string ARBDecompiler::HFma2(Operation operation) { | ||
| 1677 | const std::string tmp1 = AllocVectorTemporary(); | ||
| 1678 | const std::string tmp2 = AllocVectorTemporary(); | ||
| 1679 | const std::string tmp3 = AllocVectorTemporary(); | ||
| 1680 | AddLine("UP2H.F {}.xy, {};", tmp1, Visit(operation[0])); | ||
| 1681 | AddLine("UP2H.F {}.xy, {};", tmp2, Visit(operation[1])); | ||
| 1682 | AddLine("UP2H.F {}.xy, {};", tmp3, Visit(operation[2])); | ||
| 1683 | AddLine("MAD.F16 {}, {}, {}, {};", tmp1, tmp1, tmp2, tmp3); | ||
| 1684 | AddLine("PK2H.F {}.x, {};", tmp1, tmp1); | ||
| 1685 | return fmt::format("{}.x", tmp1); | ||
| 1686 | } | ||
| 1687 | |||
| 1688 | std::string ARBDecompiler::HAbsolute(Operation operation) { | ||
| 1689 | const std::string temporary = AllocVectorTemporary(); | ||
| 1690 | AddLine("UP2H.F {}.xy, {};", temporary, Visit(operation[0])); | ||
| 1691 | AddLine("PK2H.F {}.x, |{}|;", temporary, temporary); | ||
| 1692 | return fmt::format("{}.x", temporary); | ||
| 1693 | } | ||
| 1694 | |||
| 1695 | std::string ARBDecompiler::HNegate(Operation operation) { | ||
| 1696 | const std::string temporary = AllocVectorTemporary(); | ||
| 1697 | AddLine("UP2H.F {}.xy, {};", temporary, Visit(operation[0])); | ||
| 1698 | AddLine("MOVC.S RC.x, {};", Visit(operation[1])); | ||
| 1699 | AddLine("MOV.F {}.x (NE.x), -{}.x;", temporary, temporary); | ||
| 1700 | AddLine("MOVC.S RC.x, {};", Visit(operation[2])); | ||
| 1701 | AddLine("MOV.F {}.y (NE.x), -{}.y;", temporary, temporary); | ||
| 1702 | AddLine("PK2H.F {}.x, {};", temporary, temporary); | ||
| 1703 | return fmt::format("{}.x", temporary); | ||
| 1704 | } | ||
| 1705 | |||
| 1706 | std::string ARBDecompiler::HClamp(Operation operation) { | ||
| 1707 | const std::string tmp1 = AllocVectorTemporary(); | ||
| 1708 | const std::string tmp2 = AllocVectorTemporary(); | ||
| 1709 | AddLine("UP2H.F {}.xy, {};", tmp1, Visit(operation[0])); | ||
| 1710 | AddLine("MOV.U {}.x, {};", tmp2, Visit(operation[1])); | ||
| 1711 | AddLine("MOV.U {}.y, {}.x;", tmp2, tmp2); | ||
| 1712 | AddLine("MAX.F {}, {}, {};", tmp1, tmp1, tmp2); | ||
| 1713 | AddLine("MOV.U {}.x, {};", tmp2, Visit(operation[2])); | ||
| 1714 | AddLine("MOV.U {}.y, {}.x;", tmp2, tmp2); | ||
| 1715 | AddLine("MIN.F {}, {}, {};", tmp1, tmp1, tmp2); | ||
| 1716 | AddLine("PK2H.F {}.x, {};", tmp1, tmp1); | ||
| 1717 | return fmt::format("{}.x", tmp1); | ||
| 1718 | } | ||
| 1719 | |||
| 1720 | std::string ARBDecompiler::HCastFloat(Operation operation) { | ||
| 1721 | const std::string temporary = AllocVectorTemporary(); | ||
| 1722 | AddLine("MOV.F {}.y, {{0, 0, 0, 0}};", temporary); | ||
| 1723 | AddLine("MOV.F {}.x, {};", temporary, Visit(operation[0])); | ||
| 1724 | AddLine("PK2H.F {}.x, {};", temporary, temporary); | ||
| 1725 | return fmt::format("{}.x", temporary); | ||
| 1726 | } | ||
| 1727 | |||
| 1728 | std::string ARBDecompiler::HUnpack(Operation operation) { | ||
| 1729 | std::string operand = Visit(operation[0]); | ||
| 1730 | switch (std::get<Tegra::Shader::HalfType>(operation.GetMeta())) { | ||
| 1731 | case Tegra::Shader::HalfType::H0_H1: | ||
| 1732 | return operand; | ||
| 1733 | case Tegra::Shader::HalfType::F32: { | ||
| 1734 | const std::string temporary = AllocVectorTemporary(); | ||
| 1735 | AddLine("MOV.U {}.x, {};", temporary, operand); | ||
| 1736 | AddLine("MOV.U {}.y, {}.x;", temporary, temporary); | ||
| 1737 | AddLine("PK2H.F {}.x, {};", temporary, temporary); | ||
| 1738 | return fmt::format("{}.x", temporary); | ||
| 1739 | } | ||
| 1740 | case Tegra::Shader::HalfType::H0_H0: { | ||
| 1741 | const std::string temporary = AllocVectorTemporary(); | ||
| 1742 | AddLine("UP2H.F {}.xy, {};", temporary, operand); | ||
| 1743 | AddLine("MOV.U {}.y, {}.x;", temporary, temporary); | ||
| 1744 | AddLine("PK2H.F {}.x, {};", temporary, temporary); | ||
| 1745 | return fmt::format("{}.x", temporary); | ||
| 1746 | } | ||
| 1747 | case Tegra::Shader::HalfType::H1_H1: { | ||
| 1748 | const std::string temporary = AllocVectorTemporary(); | ||
| 1749 | AddLine("UP2H.F {}.xy, {};", temporary, operand); | ||
| 1750 | AddLine("MOV.U {}.x, {}.y;", temporary, temporary); | ||
| 1751 | AddLine("PK2H.F {}.x, {};", temporary, temporary); | ||
| 1752 | return fmt::format("{}.x", temporary); | ||
| 1753 | } | ||
| 1754 | } | ||
| 1755 | UNREACHABLE(); | ||
| 1756 | return "{0, 0, 0, 0}.x"; | ||
| 1757 | } | ||
| 1758 | |||
| 1759 | std::string ARBDecompiler::HMergeF32(Operation operation) { | ||
| 1760 | const std::string temporary = AllocVectorTemporary(); | ||
| 1761 | AddLine("UP2H.F {}.xy, {};", temporary, Visit(operation[0])); | ||
| 1762 | return fmt::format("{}.x", temporary); | ||
| 1763 | } | ||
| 1764 | |||
| 1765 | std::string ARBDecompiler::HMergeH0(Operation operation) { | ||
| 1766 | const std::string temporary = AllocVectorTemporary(); | ||
| 1767 | AddLine("UP2H.F {}.xy, {};", temporary, Visit(operation[0])); | ||
| 1768 | AddLine("UP2H.F {}.zw, {};", temporary, Visit(operation[1])); | ||
| 1769 | AddLine("MOV.U {}.x, {}.z;", temporary, temporary); | ||
| 1770 | AddLine("PK2H.F {}.x, {};", temporary, temporary); | ||
| 1771 | return fmt::format("{}.x", temporary); | ||
| 1772 | } | ||
| 1773 | |||
| 1774 | std::string ARBDecompiler::HMergeH1(Operation operation) { | ||
| 1775 | const std::string temporary = AllocVectorTemporary(); | ||
| 1776 | AddLine("UP2H.F {}.xy, {};", temporary, Visit(operation[0])); | ||
| 1777 | AddLine("UP2H.F {}.zw, {};", temporary, Visit(operation[1])); | ||
| 1778 | AddLine("MOV.U {}.y, {}.w;", temporary, temporary); | ||
| 1779 | AddLine("PK2H.F {}.x, {};", temporary, temporary); | ||
| 1780 | return fmt::format("{}.x", temporary); | ||
| 1781 | } | ||
| 1782 | |||
| 1783 | std::string ARBDecompiler::HPack2(Operation operation) { | ||
| 1784 | const std::string temporary = AllocVectorTemporary(); | ||
| 1785 | AddLine("MOV.U {}.x, {};", temporary, Visit(operation[0])); | ||
| 1786 | AddLine("MOV.U {}.y, {};", temporary, Visit(operation[1])); | ||
| 1787 | AddLine("PK2H.F {}.x, {};", temporary, temporary); | ||
| 1788 | return fmt::format("{}.x", temporary); | ||
| 1789 | } | ||
| 1790 | |||
| 1791 | std::string ARBDecompiler::LogicalAssign(Operation operation) { | ||
| 1792 | const Node& dest = operation[0]; | ||
| 1793 | const Node& src = operation[1]; | ||
| 1794 | |||
| 1795 | std::string target; | ||
| 1796 | |||
| 1797 | if (const auto pred = std::get_if<PredicateNode>(&*dest)) { | ||
| 1798 | ASSERT_MSG(!pred->IsNegated(), "Negating logical assignment"); | ||
| 1799 | |||
| 1800 | const Tegra::Shader::Pred index = pred->GetIndex(); | ||
| 1801 | switch (index) { | ||
| 1802 | case Tegra::Shader::Pred::NeverExecute: | ||
| 1803 | case Tegra::Shader::Pred::UnusedIndex: | ||
| 1804 | // Writing to these predicates is a no-op | ||
| 1805 | return {}; | ||
| 1806 | } | ||
| 1807 | target = fmt::format("P{}.x", static_cast<u64>(index)); | ||
| 1808 | } else if (const auto internal_flag = std::get_if<InternalFlagNode>(&*dest)) { | ||
| 1809 | const std::size_t index = static_cast<std::size_t>(internal_flag->GetFlag()); | ||
| 1810 | target = fmt::format("{}.x", INTERNAL_FLAG_NAMES[index]); | ||
| 1811 | } else { | ||
| 1812 | UNREACHABLE(); | ||
| 1813 | ResetTemporaries(); | ||
| 1814 | return {}; | ||
| 1815 | } | ||
| 1816 | |||
| 1817 | AddLine("MOV.U {}, {};", target, Visit(src)); | ||
| 1818 | ResetTemporaries(); | ||
| 1819 | return {}; | ||
| 1820 | } | ||
| 1821 | |||
| 1822 | std::string ARBDecompiler::LogicalPick2(Operation operation) { | ||
| 1823 | std::string temporary = AllocTemporary(); | ||
| 1824 | const u32 index = std::get<ImmediateNode>(*operation[1]).GetValue(); | ||
| 1825 | AddLine("MOV.U {}, {}.{};", temporary, Visit(operation[0]), Swizzle(index)); | ||
| 1826 | return temporary; | ||
| 1827 | } | ||
| 1828 | |||
| 1829 | std::string ARBDecompiler::LogicalAnd2(Operation operation) { | ||
| 1830 | std::string temporary = AllocTemporary(); | ||
| 1831 | const std::string op = Visit(operation[0]); | ||
| 1832 | AddLine("AND.U {}, {}.x, {}.y;", temporary, op, op); | ||
| 1833 | return temporary; | ||
| 1834 | } | ||
| 1835 | |||
| 1836 | std::string ARBDecompiler::FloatOrdered(Operation operation) { | ||
| 1837 | std::string temporary = AllocTemporary(); | ||
| 1838 | AddLine("MOVC.F32 RC.x, {};", Visit(operation[0])); | ||
| 1839 | AddLine("MOVC.F32 RC.y, {};", Visit(operation[1])); | ||
| 1840 | AddLine("MOV.S {}, -1;", temporary); | ||
| 1841 | AddLine("MOV.S {} (NAN.x), 0;", temporary); | ||
| 1842 | AddLine("MOV.S {} (NAN.y), 0;", temporary); | ||
| 1843 | return temporary; | ||
| 1844 | } | ||
| 1845 | |||
| 1846 | std::string ARBDecompiler::FloatUnordered(Operation operation) { | ||
| 1847 | std::string temporary = AllocTemporary(); | ||
| 1848 | AddLine("MOVC.F32 RC.x, {};", Visit(operation[0])); | ||
| 1849 | AddLine("MOVC.F32 RC.y, {};", Visit(operation[1])); | ||
| 1850 | AddLine("MOV.S {}, 0;", temporary); | ||
| 1851 | AddLine("MOV.S {} (NAN.x), -1;", temporary); | ||
| 1852 | AddLine("MOV.S {} (NAN.y), -1;", temporary); | ||
| 1853 | return temporary; | ||
| 1854 | } | ||
| 1855 | |||
| 1856 | std::string ARBDecompiler::LogicalAddCarry(Operation operation) { | ||
| 1857 | std::string temporary = AllocTemporary(); | ||
| 1858 | AddLine("ADDC.U RC, {}, {};", Visit(operation[0]), Visit(operation[1])); | ||
| 1859 | AddLine("MOV.S {}, 0;", temporary); | ||
| 1860 | AddLine("IF CF.x;"); | ||
| 1861 | AddLine("MOV.S {}, -1;", temporary); | ||
| 1862 | AddLine("ENDIF;"); | ||
| 1863 | return temporary; | ||
| 1864 | } | ||
| 1865 | |||
| 1866 | std::string ARBDecompiler::Texture(Operation operation) { | ||
| 1867 | const auto& meta = std::get<MetaTexture>(operation.GetMeta()); | ||
| 1868 | const u32 sampler_id = device.GetBaseBindings(stage).sampler + meta.sampler.index; | ||
| 1869 | const auto [coords, temporary, swizzle] = BuildCoords(operation); | ||
| 1870 | |||
| 1871 | std::string_view opcode = "TEX"; | ||
| 1872 | std::string extra; | ||
| 1873 | if (meta.bias) { | ||
| 1874 | ASSERT(!meta.lod); | ||
| 1875 | opcode = "TXB"; | ||
| 1876 | |||
| 1877 | if (swizzle < 4) { | ||
| 1878 | AddLine("MOV.F {}.w, {};", temporary, Visit(meta.bias)); | ||
| 1879 | } else { | ||
| 1880 | const std::string bias = AllocTemporary(); | ||
| 1881 | AddLine("MOV.F {}, {};", bias, Visit(meta.bias)); | ||
| 1882 | extra = fmt::format(" {},", bias); | ||
| 1883 | } | ||
| 1884 | } | ||
| 1885 | if (meta.lod) { | ||
| 1886 | ASSERT(!meta.bias); | ||
| 1887 | opcode = "TXL"; | ||
| 1888 | |||
| 1889 | if (swizzle < 4) { | ||
| 1890 | AddLine("MOV.F {}.w, {};", temporary, Visit(meta.lod)); | ||
| 1891 | } else { | ||
| 1892 | const std::string lod = AllocTemporary(); | ||
| 1893 | AddLine("MOV.F {}, {};", lod, Visit(meta.lod)); | ||
| 1894 | extra = fmt::format(" {},", lod); | ||
| 1895 | } | ||
| 1896 | } | ||
| 1897 | |||
| 1898 | AddLine("{}.F {}, {},{} texture[{}], {}{};", opcode, temporary, coords, extra, sampler_id, | ||
| 1899 | TextureType(meta), BuildAoffi(operation)); | ||
| 1900 | AddLine("MOV.U {}.x, {}.{};", temporary, temporary, Swizzle(meta.element)); | ||
| 1901 | return fmt::format("{}.x", temporary); | ||
| 1902 | } | ||
| 1903 | |||
| 1904 | std::string ARBDecompiler::TextureGather(Operation operation) { | ||
| 1905 | const auto& meta = std::get<MetaTexture>(operation.GetMeta()); | ||
| 1906 | const u32 sampler_id = device.GetBaseBindings(stage).sampler + meta.sampler.index; | ||
| 1907 | const auto [coords, temporary, swizzle] = BuildCoords(operation); | ||
| 1908 | |||
| 1909 | std::string comp; | ||
| 1910 | if (!meta.sampler.is_shadow) { | ||
| 1911 | const auto& immediate = std::get<ImmediateNode>(*meta.component); | ||
| 1912 | comp = fmt::format(".{}", Swizzle(immediate.GetValue())); | ||
| 1913 | } | ||
| 1914 | |||
| 1915 | AddLine("TXG.F {}, {}, texture[{}]{}, {}{};", temporary, temporary, sampler_id, comp, | ||
| 1916 | TextureType(meta), BuildAoffi(operation)); | ||
| 1917 | AddLine("MOV.U {}.x, {}.{};", temporary, coords, Swizzle(meta.element)); | ||
| 1918 | return fmt::format("{}.x", temporary); | ||
| 1919 | } | ||
| 1920 | |||
| 1921 | std::string ARBDecompiler::TextureQueryDimensions(Operation operation) { | ||
| 1922 | const auto& meta = std::get<MetaTexture>(operation.GetMeta()); | ||
| 1923 | const std::string temporary = AllocVectorTemporary(); | ||
| 1924 | const u32 sampler_id = device.GetBaseBindings(stage).sampler + meta.sampler.index; | ||
| 1925 | |||
| 1926 | ASSERT(!meta.sampler.is_array); | ||
| 1927 | |||
| 1928 | const std::string lod = operation.GetOperandsCount() > 0 ? Visit(operation[0]) : "0"; | ||
| 1929 | AddLine("TXQ {}, {}, texture[{}], {};", temporary, lod, sampler_id, TextureType(meta)); | ||
| 1930 | AddLine("MOV.U {}.x, {}.{};", temporary, temporary, Swizzle(meta.element)); | ||
| 1931 | return fmt::format("{}.x", temporary); | ||
| 1932 | } | ||
| 1933 | |||
| 1934 | std::string ARBDecompiler::TextureQueryLod(Operation operation) { | ||
| 1935 | const auto& meta = std::get<MetaTexture>(operation.GetMeta()); | ||
| 1936 | const std::string temporary = AllocVectorTemporary(); | ||
| 1937 | const u32 sampler_id = device.GetBaseBindings(stage).sampler + meta.sampler.index; | ||
| 1938 | |||
| 1939 | ASSERT(!meta.sampler.is_array); | ||
| 1940 | |||
| 1941 | const std::size_t count = operation.GetOperandsCount(); | ||
| 1942 | for (std::size_t i = 0; i < count; ++i) { | ||
| 1943 | AddLine("MOV.F {}.{}, {};", temporary, Swizzle(i), Visit(operation[i])); | ||
| 1944 | } | ||
| 1945 | AddLine("LOD.F {}, {}, texture[{}], {};", temporary, temporary, sampler_id, TextureType(meta)); | ||
| 1946 | AddLine("MUL.F32 {}, {}, {{256, 256, 0, 0}};", temporary, temporary); | ||
| 1947 | AddLine("TRUNC.S {}, {};", temporary, temporary); | ||
| 1948 | AddLine("MOV.U {}.x, {}.{};", temporary, temporary, Swizzle(meta.element)); | ||
| 1949 | return fmt::format("{}.x", temporary); | ||
| 1950 | } | ||
| 1951 | |||
| 1952 | std::string ARBDecompiler::TexelFetch(Operation operation) { | ||
| 1953 | const auto& meta = std::get<MetaTexture>(operation.GetMeta()); | ||
| 1954 | const u32 sampler_id = device.GetBaseBindings(stage).sampler + meta.sampler.index; | ||
| 1955 | const auto [coords, temporary, swizzle] = BuildCoords(operation); | ||
| 1956 | |||
| 1957 | if (!meta.sampler.is_buffer) { | ||
| 1958 | ASSERT(swizzle < 4); | ||
| 1959 | AddLine("MOV.F {}.w, {};", temporary, Visit(meta.lod)); | ||
| 1960 | } | ||
| 1961 | AddLine("TXF.F {}, {}, texture[{}], {}{};", temporary, coords, sampler_id, TextureType(meta), | ||
| 1962 | BuildAoffi(operation)); | ||
| 1963 | AddLine("MOV.U {}.x, {}.{};", temporary, temporary, Swizzle(meta.element)); | ||
| 1964 | return fmt::format("{}.x", temporary); | ||
| 1965 | } | ||
| 1966 | |||
| 1967 | std::string ARBDecompiler::TextureGradient(Operation operation) { | ||
| 1968 | const auto& meta = std::get<MetaTexture>(operation.GetMeta()); | ||
| 1969 | const u32 sampler_id = device.GetBaseBindings(stage).sampler + meta.sampler.index; | ||
| 1970 | const std::string ddx = AllocVectorTemporary(); | ||
| 1971 | const std::string ddy = AllocVectorTemporary(); | ||
| 1972 | const std::string coord = std::get<1>(BuildCoords(operation)); | ||
| 1973 | |||
| 1974 | const std::size_t num_components = meta.derivates.size() / 2; | ||
| 1975 | for (std::size_t index = 0; index < num_components; ++index) { | ||
| 1976 | const char swizzle = Swizzle(index); | ||
| 1977 | AddLine("MOV.F {}.{}, {};", ddx, swizzle, Visit(meta.derivates[index * 2])); | ||
| 1978 | AddLine("MOV.F {}.{}, {};", ddy, swizzle, Visit(meta.derivates[index * 2 + 1])); | ||
| 1979 | } | ||
| 1980 | |||
| 1981 | const std::string_view result = coord; | ||
| 1982 | AddLine("TXD.F {}, {}, {}, {}, texture[{}], {}{};", result, coord, ddx, ddy, sampler_id, | ||
| 1983 | TextureType(meta), BuildAoffi(operation)); | ||
| 1984 | AddLine("MOV.F {}.x, {}.{};", result, result, Swizzle(meta.element)); | ||
| 1985 | return fmt::format("{}.x", result); | ||
| 1986 | } | ||
| 1987 | |||
| 1988 | std::string ARBDecompiler::ImageLoad(Operation operation) { | ||
| 1989 | const auto& meta = std::get<MetaImage>(operation.GetMeta()); | ||
| 1990 | const u32 image_id = device.GetBaseBindings(stage).image + meta.image.index; | ||
| 1991 | const std::size_t count = operation.GetOperandsCount(); | ||
| 1992 | const std::string_view type = ImageType(meta.image.type); | ||
| 1993 | |||
| 1994 | const std::string temporary = AllocVectorTemporary(); | ||
| 1995 | for (std::size_t i = 0; i < count; ++i) { | ||
| 1996 | AddLine("MOV.S {}.{}, {};", temporary, Swizzle(i), Visit(operation[i])); | ||
| 1997 | } | ||
| 1998 | AddLine("LOADIM.F {}, {}, image[{}], {};", temporary, temporary, image_id, type); | ||
| 1999 | AddLine("MOV.F {}.x, {}.{};", temporary, temporary, Swizzle(meta.element)); | ||
| 2000 | return fmt::format("{}.x", temporary); | ||
| 2001 | } | ||
| 2002 | |||
| 2003 | std::string ARBDecompiler::ImageStore(Operation operation) { | ||
| 2004 | const auto& meta = std::get<MetaImage>(operation.GetMeta()); | ||
| 2005 | const u32 image_id = device.GetBaseBindings(stage).image + meta.image.index; | ||
| 2006 | const std::size_t num_coords = operation.GetOperandsCount(); | ||
| 2007 | const std::size_t num_values = meta.values.size(); | ||
| 2008 | const std::string_view type = ImageType(meta.image.type); | ||
| 2009 | |||
| 2010 | const std::string coord = AllocVectorTemporary(); | ||
| 2011 | const std::string value = AllocVectorTemporary(); | ||
| 2012 | for (std::size_t i = 0; i < num_coords; ++i) { | ||
| 2013 | AddLine("MOV.S {}.{}, {};", coord, Swizzle(i), Visit(operation[i])); | ||
| 2014 | } | ||
| 2015 | for (std::size_t i = 0; i < num_values; ++i) { | ||
| 2016 | AddLine("MOV.F {}.{}, {};", value, Swizzle(i), Visit(meta.values[i])); | ||
| 2017 | } | ||
| 2018 | AddLine("STOREIM.F image[{}], {}, {}, {};", image_id, value, coord, type); | ||
| 2019 | return {}; | ||
| 2020 | } | ||
| 2021 | |||
| 2022 | std::string ARBDecompiler::Branch(Operation operation) { | ||
| 2023 | const auto target = std::get<ImmediateNode>(*operation[0]); | ||
| 2024 | AddLine("MOV.U PC.x, {};", target.GetValue()); | ||
| 2025 | AddLine("CONT;"); | ||
| 2026 | return {}; | ||
| 2027 | } | ||
| 2028 | |||
| 2029 | std::string ARBDecompiler::BranchIndirect(Operation operation) { | ||
| 2030 | AddLine("MOV.U PC.x, {};", Visit(operation[0])); | ||
| 2031 | AddLine("CONT;"); | ||
| 2032 | return {}; | ||
| 2033 | } | ||
| 2034 | |||
| 2035 | std::string ARBDecompiler::PushFlowStack(Operation operation) { | ||
| 2036 | const auto stack = std::get<MetaStackClass>(operation.GetMeta()); | ||
| 2037 | const u32 target = std::get<ImmediateNode>(*operation[0]).GetValue(); | ||
| 2038 | const std::string_view stack_name = StackName(stack); | ||
| 2039 | AddLine("MOV.U {}[{}_TOP.x].x, {};", stack_name, stack_name, target); | ||
| 2040 | AddLine("ADD.S {}_TOP.x, {}_TOP.x, 1;", stack_name, stack_name); | ||
| 2041 | return {}; | ||
| 2042 | } | ||
| 2043 | |||
| 2044 | std::string ARBDecompiler::PopFlowStack(Operation operation) { | ||
| 2045 | const auto stack = std::get<MetaStackClass>(operation.GetMeta()); | ||
| 2046 | const std::string_view stack_name = StackName(stack); | ||
| 2047 | AddLine("SUB.S {}_TOP.x, {}_TOP.x, 1;", stack_name, stack_name); | ||
| 2048 | AddLine("MOV.U PC.x, {}[{}_TOP.x].x;", stack_name, stack_name); | ||
| 2049 | AddLine("CONT;"); | ||
| 2050 | return {}; | ||
| 2051 | } | ||
| 2052 | |||
| 2053 | std::string ARBDecompiler::Exit(Operation) { | ||
| 2054 | Exit(); | ||
| 2055 | return {}; | ||
| 2056 | } | ||
| 2057 | |||
| 2058 | std::string ARBDecompiler::Discard(Operation) { | ||
| 2059 | AddLine("KIL TR;"); | ||
| 2060 | return {}; | ||
| 2061 | } | ||
| 2062 | |||
| 2063 | std::string ARBDecompiler::EmitVertex(Operation) { | ||
| 2064 | AddLine("EMIT;"); | ||
| 2065 | return {}; | ||
| 2066 | } | ||
| 2067 | |||
| 2068 | std::string ARBDecompiler::EndPrimitive(Operation) { | ||
| 2069 | AddLine("ENDPRIM;"); | ||
| 2070 | return {}; | ||
| 2071 | } | ||
| 2072 | |||
| 2073 | std::string ARBDecompiler::InvocationId(Operation) { | ||
| 2074 | return "primitive.invocation"; | ||
| 2075 | } | ||
| 2076 | |||
| 2077 | std::string ARBDecompiler::YNegate(Operation) { | ||
| 2078 | LOG_WARNING(Render_OpenGL, "(STUBBED)"); | ||
| 2079 | std::string temporary = AllocTemporary(); | ||
| 2080 | AddLine("MOV.F {}, 1;", temporary); | ||
| 2081 | return temporary; | ||
| 2082 | } | ||
| 2083 | |||
| 2084 | std::string ARBDecompiler::ThreadId(Operation) { | ||
| 2085 | return fmt::format("{}.threadid", StageInputName(stage)); | ||
| 2086 | } | ||
| 2087 | |||
| 2088 | std::string ARBDecompiler::ShuffleIndexed(Operation operation) { | ||
| 2089 | if (!device.HasWarpIntrinsics()) { | ||
| 2090 | LOG_ERROR(Render_OpenGL, | ||
| 2091 | "NV_shader_thread_shuffle is missing. Kepler or better is required."); | ||
| 2092 | return Visit(operation[0]); | ||
| 2093 | } | ||
| 2094 | const std::string temporary = AllocVectorTemporary(); | ||
| 2095 | AddLine("SHFIDX.U {}, {}, {}, {{31, 0, 0, 0}};", temporary, Visit(operation[0]), | ||
| 2096 | Visit(operation[1])); | ||
| 2097 | AddLine("MOV.U {}.x, {}.y;", temporary, temporary); | ||
| 2098 | return fmt::format("{}.x", temporary); | ||
| 2099 | } | ||
| 2100 | |||
| 2101 | std::string ARBDecompiler::Barrier(Operation) { | ||
| 2102 | AddLine("BAR;"); | ||
| 2103 | return {}; | ||
| 2104 | } | ||
| 2105 | |||
| 2106 | std::string ARBDecompiler::MemoryBarrierGroup(Operation) { | ||
| 2107 | AddLine("MEMBAR.CTA;"); | ||
| 2108 | return {}; | ||
| 2109 | } | ||
| 2110 | |||
| 2111 | std::string ARBDecompiler::MemoryBarrierGlobal(Operation) { | ||
| 2112 | AddLine("MEMBAR;"); | ||
| 2113 | return {}; | ||
| 2114 | } | ||
| 2115 | |||
| 2116 | } // Anonymous namespace | ||
| 2117 | |||
| 2118 | std::string DecompileAssemblyShader(const Device& device, const VideoCommon::Shader::ShaderIR& ir, | ||
| 2119 | const VideoCommon::Shader::Registry& registry, | ||
| 2120 | Tegra::Engines::ShaderType stage, std::string_view identifier) { | ||
| 2121 | return ARBDecompiler(device, ir, registry, stage, identifier).Code(); | ||
| 2122 | } | ||
| 2123 | |||
| 2124 | } // namespace OpenGL | ||
diff --git a/src/video_core/renderer_opengl/gl_arb_decompiler.h b/src/video_core/renderer_opengl/gl_arb_decompiler.h deleted file mode 100644 index 6afc87220..000000000 --- a/src/video_core/renderer_opengl/gl_arb_decompiler.h +++ /dev/null | |||
| @@ -1,29 +0,0 @@ | |||
| 1 | // Copyright 2020 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <string> | ||
| 8 | #include <string_view> | ||
| 9 | |||
| 10 | #include "common/common_types.h" | ||
| 11 | |||
| 12 | namespace Tegra::Engines { | ||
| 13 | enum class ShaderType : u32; | ||
| 14 | } | ||
| 15 | |||
| 16 | namespace VideoCommon::Shader { | ||
| 17 | class ShaderIR; | ||
| 18 | class Registry; | ||
| 19 | } // namespace VideoCommon::Shader | ||
| 20 | |||
| 21 | namespace OpenGL { | ||
| 22 | |||
| 23 | class Device; | ||
| 24 | |||
| 25 | std::string DecompileAssemblyShader(const Device& device, const VideoCommon::Shader::ShaderIR& ir, | ||
| 26 | const VideoCommon::Shader::Registry& registry, | ||
| 27 | Tegra::Engines::ShaderType stage, std::string_view identifier); | ||
| 28 | |||
| 29 | } // namespace OpenGL | ||
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index ceb3abcb2..3551dbdcc 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp | |||
| @@ -54,40 +54,6 @@ namespace { | |||
| 54 | 54 | ||
| 55 | constexpr size_t NUM_SUPPORTED_VERTEX_ATTRIBUTES = 16; | 55 | constexpr size_t NUM_SUPPORTED_VERTEX_ATTRIBUTES = 16; |
| 56 | 56 | ||
| 57 | struct TextureHandle { | ||
| 58 | constexpr TextureHandle(u32 data, bool via_header_index) { | ||
| 59 | const Tegra::Texture::TextureHandle handle{data}; | ||
| 60 | image = handle.tic_id; | ||
| 61 | sampler = via_header_index ? image : handle.tsc_id.Value(); | ||
| 62 | } | ||
| 63 | |||
| 64 | u32 image; | ||
| 65 | u32 sampler; | ||
| 66 | }; | ||
| 67 | |||
| 68 | template <typename Engine, typename Entry> | ||
| 69 | TextureHandle GetTextureInfo(const Engine& engine, bool via_header_index, const Entry& entry, | ||
| 70 | ShaderType shader_type, size_t index = 0) { | ||
| 71 | if constexpr (std::is_same_v<Entry, SamplerEntry>) { | ||
| 72 | if (entry.is_separated) { | ||
| 73 | const u32 buffer_1 = entry.buffer; | ||
| 74 | const u32 buffer_2 = entry.secondary_buffer; | ||
| 75 | const u32 offset_1 = entry.offset; | ||
| 76 | const u32 offset_2 = entry.secondary_offset; | ||
| 77 | const u32 handle_1 = engine.AccessConstBuffer32(shader_type, buffer_1, offset_1); | ||
| 78 | const u32 handle_2 = engine.AccessConstBuffer32(shader_type, buffer_2, offset_2); | ||
| 79 | return TextureHandle(handle_1 | handle_2, via_header_index); | ||
| 80 | } | ||
| 81 | } | ||
| 82 | if (entry.is_bindless) { | ||
| 83 | const u32 raw = engine.AccessConstBuffer32(shader_type, entry.buffer, entry.offset); | ||
| 84 | return TextureHandle(raw, via_header_index); | ||
| 85 | } | ||
| 86 | const u32 buffer = engine.GetBoundBuffer(); | ||
| 87 | const u64 offset = (entry.offset + index) * sizeof(u32); | ||
| 88 | return TextureHandle(engine.AccessConstBuffer32(shader_type, buffer, offset), via_header_index); | ||
| 89 | } | ||
| 90 | |||
| 91 | /// Translates hardware transform feedback indices | 57 | /// Translates hardware transform feedback indices |
| 92 | /// @param location Hardware location | 58 | /// @param location Hardware location |
| 93 | /// @return Pair of ARB_transform_feedback3 token stream first and third arguments | 59 | /// @return Pair of ARB_transform_feedback3 token stream first and third arguments |
| @@ -119,44 +85,6 @@ std::pair<GLint, GLint> TransformFeedbackEnum(u8 location) { | |||
| 119 | void oglEnable(GLenum cap, bool state) { | 85 | void oglEnable(GLenum cap, bool state) { |
| 120 | (state ? glEnable : glDisable)(cap); | 86 | (state ? glEnable : glDisable)(cap); |
| 121 | } | 87 | } |
| 122 | |||
| 123 | ImageViewType ImageViewTypeFromEntry(const SamplerEntry& entry) { | ||
| 124 | if (entry.is_buffer) { | ||
| 125 | return ImageViewType::Buffer; | ||
| 126 | } | ||
| 127 | switch (entry.type) { | ||
| 128 | case Tegra::Shader::TextureType::Texture1D: | ||
| 129 | return entry.is_array ? ImageViewType::e1DArray : ImageViewType::e1D; | ||
| 130 | case Tegra::Shader::TextureType::Texture2D: | ||
| 131 | return entry.is_array ? ImageViewType::e2DArray : ImageViewType::e2D; | ||
| 132 | case Tegra::Shader::TextureType::Texture3D: | ||
| 133 | return ImageViewType::e3D; | ||
| 134 | case Tegra::Shader::TextureType::TextureCube: | ||
| 135 | return entry.is_array ? ImageViewType::CubeArray : ImageViewType::Cube; | ||
| 136 | } | ||
| 137 | UNREACHABLE(); | ||
| 138 | return ImageViewType::e2D; | ||
| 139 | } | ||
| 140 | |||
| 141 | ImageViewType ImageViewTypeFromEntry(const ImageEntry& entry) { | ||
| 142 | switch (entry.type) { | ||
| 143 | case Tegra::Shader::ImageType::Texture1D: | ||
| 144 | return ImageViewType::e1D; | ||
| 145 | case Tegra::Shader::ImageType::Texture1DArray: | ||
| 146 | return ImageViewType::e1DArray; | ||
| 147 | case Tegra::Shader::ImageType::Texture2D: | ||
| 148 | return ImageViewType::e2D; | ||
| 149 | case Tegra::Shader::ImageType::Texture2DArray: | ||
| 150 | return ImageViewType::e2DArray; | ||
| 151 | case Tegra::Shader::ImageType::Texture3D: | ||
| 152 | return ImageViewType::e3D; | ||
| 153 | case Tegra::Shader::ImageType::TextureBuffer: | ||
| 154 | return ImageViewType::Buffer; | ||
| 155 | } | ||
| 156 | UNREACHABLE(); | ||
| 157 | return ImageViewType::e2D; | ||
| 158 | } | ||
| 159 | |||
| 160 | } // Anonymous namespace | 88 | } // Anonymous namespace |
| 161 | 89 | ||
| 162 | RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_, | 90 | RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_, |
| @@ -172,12 +100,7 @@ RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window_, Tegra | |||
| 172 | buffer_cache(*this, maxwell3d, kepler_compute, gpu_memory, cpu_memory_, buffer_cache_runtime), | 100 | buffer_cache(*this, maxwell3d, kepler_compute, gpu_memory, cpu_memory_, buffer_cache_runtime), |
| 173 | shader_cache(*this, emu_window_, gpu, maxwell3d, kepler_compute, gpu_memory, device), | 101 | shader_cache(*this, emu_window_, gpu, maxwell3d, kepler_compute, gpu_memory, device), |
| 174 | query_cache(*this, maxwell3d, gpu_memory), accelerate_dma(buffer_cache), | 102 | query_cache(*this, maxwell3d, gpu_memory), accelerate_dma(buffer_cache), |
| 175 | fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache), | 103 | fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache) {} |
| 176 | async_shaders(emu_window_) { | ||
| 177 | if (device.UseAsynchronousShaders()) { | ||
| 178 | async_shaders.AllocateWorkers(); | ||
| 179 | } | ||
| 180 | } | ||
| 181 | 104 | ||
| 182 | RasterizerOpenGL::~RasterizerOpenGL() = default; | 105 | RasterizerOpenGL::~RasterizerOpenGL() = default; |
| 183 | 106 | ||
| @@ -244,117 +167,8 @@ void RasterizerOpenGL::SyncVertexInstances() { | |||
| 244 | } | 167 | } |
| 245 | } | 168 | } |
| 246 | 169 | ||
| 247 | void RasterizerOpenGL::SetupShaders(bool is_indexed) { | ||
| 248 | u32 clip_distances = 0; | ||
| 249 | |||
| 250 | std::array<Shader*, Maxwell::MaxShaderStage> shaders{}; | ||
| 251 | image_view_indices.clear(); | ||
| 252 | sampler_handles.clear(); | ||
| 253 | |||
| 254 | texture_cache.SynchronizeGraphicsDescriptors(); | ||
| 255 | |||
| 256 | for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { | ||
| 257 | const auto& shader_config = maxwell3d.regs.shader_config[index]; | ||
| 258 | const auto program{static_cast<Maxwell::ShaderProgram>(index)}; | ||
| 259 | |||
| 260 | // Skip stages that are not enabled | ||
| 261 | if (!maxwell3d.regs.IsShaderConfigEnabled(index)) { | ||
| 262 | switch (program) { | ||
| 263 | case Maxwell::ShaderProgram::Geometry: | ||
| 264 | program_manager.UseGeometryShader(0); | ||
| 265 | break; | ||
| 266 | case Maxwell::ShaderProgram::Fragment: | ||
| 267 | program_manager.UseFragmentShader(0); | ||
| 268 | break; | ||
| 269 | default: | ||
| 270 | break; | ||
| 271 | } | ||
| 272 | continue; | ||
| 273 | } | ||
| 274 | // Currently this stages are not supported in the OpenGL backend. | ||
| 275 | // TODO(Blinkhawk): Port tesselation shaders from Vulkan to OpenGL | ||
| 276 | if (program == Maxwell::ShaderProgram::TesselationControl || | ||
| 277 | program == Maxwell::ShaderProgram::TesselationEval) { | ||
| 278 | continue; | ||
| 279 | } | ||
| 280 | |||
| 281 | Shader* const shader = shader_cache.GetStageProgram(program, async_shaders); | ||
| 282 | const GLuint program_handle = shader->IsBuilt() ? shader->GetHandle() : 0; | ||
| 283 | switch (program) { | ||
| 284 | case Maxwell::ShaderProgram::VertexA: | ||
| 285 | case Maxwell::ShaderProgram::VertexB: | ||
| 286 | program_manager.UseVertexShader(program_handle); | ||
| 287 | break; | ||
| 288 | case Maxwell::ShaderProgram::Geometry: | ||
| 289 | program_manager.UseGeometryShader(program_handle); | ||
| 290 | break; | ||
| 291 | case Maxwell::ShaderProgram::Fragment: | ||
| 292 | program_manager.UseFragmentShader(program_handle); | ||
| 293 | break; | ||
| 294 | default: | ||
| 295 | UNIMPLEMENTED_MSG("Unimplemented shader index={}, enable={}, offset=0x{:08X}", index, | ||
| 296 | shader_config.enable.Value(), shader_config.offset); | ||
| 297 | break; | ||
| 298 | } | ||
| 299 | |||
| 300 | // Stage indices are 0 - 5 | ||
| 301 | const size_t stage = index == 0 ? 0 : index - 1; | ||
| 302 | shaders[stage] = shader; | ||
| 303 | |||
| 304 | SetupDrawTextures(shader, stage); | ||
| 305 | SetupDrawImages(shader, stage); | ||
| 306 | |||
| 307 | buffer_cache.SetEnabledUniformBuffers(stage, shader->GetEntries().enabled_uniform_buffers); | ||
| 308 | |||
| 309 | buffer_cache.UnbindGraphicsStorageBuffers(stage); | ||
| 310 | u32 ssbo_index = 0; | ||
| 311 | for (const auto& buffer : shader->GetEntries().global_memory_entries) { | ||
| 312 | buffer_cache.BindGraphicsStorageBuffer(stage, ssbo_index, buffer.cbuf_index, | ||
| 313 | buffer.cbuf_offset, buffer.is_written); | ||
| 314 | ++ssbo_index; | ||
| 315 | } | ||
| 316 | |||
| 317 | // Workaround for Intel drivers. | ||
| 318 | // When a clip distance is enabled but not set in the shader it crops parts of the screen | ||
| 319 | // (sometimes it's half the screen, sometimes three quarters). To avoid this, enable the | ||
| 320 | // clip distances only when it's written by a shader stage. | ||
| 321 | clip_distances |= shader->GetEntries().clip_distances; | ||
| 322 | |||
| 323 | // When VertexA is enabled, we have dual vertex shaders | ||
| 324 | if (program == Maxwell::ShaderProgram::VertexA) { | ||
| 325 | // VertexB was combined with VertexA, so we skip the VertexB iteration | ||
| 326 | ++index; | ||
| 327 | } | ||
| 328 | } | ||
| 329 | SyncClipEnabled(clip_distances); | ||
| 330 | maxwell3d.dirty.flags[Dirty::Shaders] = false; | ||
| 331 | |||
| 332 | buffer_cache.UpdateGraphicsBuffers(is_indexed); | ||
| 333 | |||
| 334 | const std::span indices_span(image_view_indices.data(), image_view_indices.size()); | ||
| 335 | texture_cache.FillGraphicsImageViews(indices_span, image_view_ids); | ||
| 336 | |||
| 337 | buffer_cache.BindHostGeometryBuffers(is_indexed); | ||
| 338 | |||
| 339 | size_t image_view_index = 0; | ||
| 340 | size_t texture_index = 0; | ||
| 341 | size_t image_index = 0; | ||
| 342 | for (size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) { | ||
| 343 | const Shader* const shader = shaders[stage]; | ||
| 344 | if (!shader) { | ||
| 345 | continue; | ||
| 346 | } | ||
| 347 | buffer_cache.BindHostStageBuffers(stage); | ||
| 348 | const auto& base = device.GetBaseBindings(stage); | ||
| 349 | BindTextures(shader->GetEntries(), base.sampler, base.image, image_view_index, | ||
| 350 | texture_index, image_index); | ||
| 351 | } | ||
| 352 | } | ||
| 353 | |||
| 354 | void RasterizerOpenGL::LoadDiskResources(u64 title_id, std::stop_token stop_loading, | 170 | void RasterizerOpenGL::LoadDiskResources(u64 title_id, std::stop_token stop_loading, |
| 355 | const VideoCore::DiskResourceLoadCallback& callback) { | 171 | const VideoCore::DiskResourceLoadCallback& callback) {} |
| 356 | shader_cache.LoadDiskCache(title_id, stop_loading, callback); | ||
| 357 | } | ||
| 358 | 172 | ||
| 359 | void RasterizerOpenGL::Clear() { | 173 | void RasterizerOpenGL::Clear() { |
| 360 | MICROPROFILE_SCOPE(OpenGL_Clears); | 174 | MICROPROFILE_SCOPE(OpenGL_Clears); |
| @@ -434,7 +248,6 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) { | |||
| 434 | 248 | ||
| 435 | // Setup shaders and their used resources. | 249 | // Setup shaders and their used resources. |
| 436 | std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex}; | 250 | std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex}; |
| 437 | SetupShaders(is_indexed); | ||
| 438 | 251 | ||
| 439 | texture_cache.UpdateRenderTargets(false); | 252 | texture_cache.UpdateRenderTargets(false); |
| 440 | state_tracker.BindFramebuffer(texture_cache.GetFramebuffer()->Handle()); | 253 | state_tracker.BindFramebuffer(texture_cache.GetFramebuffer()->Handle()); |
| @@ -488,27 +301,8 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) { | |||
| 488 | gpu.TickWork(); | 301 | gpu.TickWork(); |
| 489 | } | 302 | } |
| 490 | 303 | ||
| 491 | void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) { | 304 | void RasterizerOpenGL::DispatchCompute() { |
| 492 | Shader* const kernel = shader_cache.GetComputeKernel(code_addr); | 305 | UNREACHABLE_MSG("Not implemented"); |
| 493 | |||
| 494 | std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex}; | ||
| 495 | BindComputeTextures(kernel); | ||
| 496 | |||
| 497 | const auto& entries = kernel->GetEntries(); | ||
| 498 | buffer_cache.SetEnabledComputeUniformBuffers(entries.enabled_uniform_buffers); | ||
| 499 | buffer_cache.UnbindComputeStorageBuffers(); | ||
| 500 | u32 ssbo_index = 0; | ||
| 501 | for (const auto& buffer : entries.global_memory_entries) { | ||
| 502 | buffer_cache.BindComputeStorageBuffer(ssbo_index, buffer.cbuf_index, buffer.cbuf_offset, | ||
| 503 | buffer.is_written); | ||
| 504 | ++ssbo_index; | ||
| 505 | } | ||
| 506 | buffer_cache.UpdateComputeBuffers(); | ||
| 507 | buffer_cache.BindHostComputeBuffers(); | ||
| 508 | |||
| 509 | const auto& launch_desc = kepler_compute.launch_description; | ||
| 510 | glDispatchCompute(launch_desc.grid_dim_x, launch_desc.grid_dim_y, launch_desc.grid_dim_z); | ||
| 511 | ++num_queued_commands; | ||
| 512 | } | 306 | } |
| 513 | 307 | ||
| 514 | void RasterizerOpenGL::ResetCounter(VideoCore::QueryType type) { | 308 | void RasterizerOpenGL::ResetCounter(VideoCore::QueryType type) { |
| @@ -726,106 +520,6 @@ bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config, | |||
| 726 | return true; | 520 | return true; |
| 727 | } | 521 | } |
| 728 | 522 | ||
| 729 | void RasterizerOpenGL::BindComputeTextures(Shader* kernel) { | ||
| 730 | image_view_indices.clear(); | ||
| 731 | sampler_handles.clear(); | ||
| 732 | |||
| 733 | texture_cache.SynchronizeComputeDescriptors(); | ||
| 734 | |||
| 735 | SetupComputeTextures(kernel); | ||
| 736 | SetupComputeImages(kernel); | ||
| 737 | |||
| 738 | const std::span indices_span(image_view_indices.data(), image_view_indices.size()); | ||
| 739 | texture_cache.FillComputeImageViews(indices_span, image_view_ids); | ||
| 740 | |||
| 741 | program_manager.BindCompute(kernel->GetHandle()); | ||
| 742 | size_t image_view_index = 0; | ||
| 743 | size_t texture_index = 0; | ||
| 744 | size_t image_index = 0; | ||
| 745 | BindTextures(kernel->GetEntries(), 0, 0, image_view_index, texture_index, image_index); | ||
| 746 | } | ||
| 747 | |||
| 748 | void RasterizerOpenGL::BindTextures(const ShaderEntries& entries, GLuint base_texture, | ||
| 749 | GLuint base_image, size_t& image_view_index, | ||
| 750 | size_t& texture_index, size_t& image_index) { | ||
| 751 | const GLuint* const samplers = sampler_handles.data() + texture_index; | ||
| 752 | const GLuint* const textures = texture_handles.data() + texture_index; | ||
| 753 | const GLuint* const images = image_handles.data() + image_index; | ||
| 754 | |||
| 755 | const size_t num_samplers = entries.samplers.size(); | ||
| 756 | for (const auto& sampler : entries.samplers) { | ||
| 757 | for (size_t i = 0; i < sampler.size; ++i) { | ||
| 758 | const ImageViewId image_view_id = image_view_ids[image_view_index++]; | ||
| 759 | const ImageView& image_view = texture_cache.GetImageView(image_view_id); | ||
| 760 | const GLuint handle = image_view.Handle(ImageViewTypeFromEntry(sampler)); | ||
| 761 | texture_handles[texture_index++] = handle; | ||
| 762 | } | ||
| 763 | } | ||
| 764 | const size_t num_images = entries.images.size(); | ||
| 765 | for (size_t unit = 0; unit < num_images; ++unit) { | ||
| 766 | // TODO: Mark as modified | ||
| 767 | const ImageViewId image_view_id = image_view_ids[image_view_index++]; | ||
| 768 | const ImageView& image_view = texture_cache.GetImageView(image_view_id); | ||
| 769 | const GLuint handle = image_view.Handle(ImageViewTypeFromEntry(entries.images[unit])); | ||
| 770 | image_handles[image_index] = handle; | ||
| 771 | ++image_index; | ||
| 772 | } | ||
| 773 | if (num_samplers > 0) { | ||
| 774 | glBindSamplers(base_texture, static_cast<GLsizei>(num_samplers), samplers); | ||
| 775 | glBindTextures(base_texture, static_cast<GLsizei>(num_samplers), textures); | ||
| 776 | } | ||
| 777 | if (num_images > 0) { | ||
| 778 | glBindImageTextures(base_image, static_cast<GLsizei>(num_images), images); | ||
| 779 | } | ||
| 780 | } | ||
| 781 | |||
| 782 | void RasterizerOpenGL::SetupDrawTextures(const Shader* shader, size_t stage_index) { | ||
| 783 | const bool via_header_index = | ||
| 784 | maxwell3d.regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex; | ||
| 785 | for (const auto& entry : shader->GetEntries().samplers) { | ||
| 786 | const auto shader_type = static_cast<ShaderType>(stage_index); | ||
| 787 | for (size_t index = 0; index < entry.size; ++index) { | ||
| 788 | const auto handle = | ||
| 789 | GetTextureInfo(maxwell3d, via_header_index, entry, shader_type, index); | ||
| 790 | const Sampler* const sampler = texture_cache.GetGraphicsSampler(handle.sampler); | ||
| 791 | sampler_handles.push_back(sampler->Handle()); | ||
| 792 | image_view_indices.push_back(handle.image); | ||
| 793 | } | ||
| 794 | } | ||
| 795 | } | ||
| 796 | |||
| 797 | void RasterizerOpenGL::SetupComputeTextures(const Shader* kernel) { | ||
| 798 | const bool via_header_index = kepler_compute.launch_description.linked_tsc; | ||
| 799 | for (const auto& entry : kernel->GetEntries().samplers) { | ||
| 800 | for (size_t i = 0; i < entry.size; ++i) { | ||
| 801 | const auto handle = | ||
| 802 | GetTextureInfo(kepler_compute, via_header_index, entry, ShaderType::Compute, i); | ||
| 803 | const Sampler* const sampler = texture_cache.GetComputeSampler(handle.sampler); | ||
| 804 | sampler_handles.push_back(sampler->Handle()); | ||
| 805 | image_view_indices.push_back(handle.image); | ||
| 806 | } | ||
| 807 | } | ||
| 808 | } | ||
| 809 | |||
| 810 | void RasterizerOpenGL::SetupDrawImages(const Shader* shader, size_t stage_index) { | ||
| 811 | const bool via_header_index = | ||
| 812 | maxwell3d.regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex; | ||
| 813 | for (const auto& entry : shader->GetEntries().images) { | ||
| 814 | const auto shader_type = static_cast<ShaderType>(stage_index); | ||
| 815 | const auto handle = GetTextureInfo(maxwell3d, via_header_index, entry, shader_type); | ||
| 816 | image_view_indices.push_back(handle.image); | ||
| 817 | } | ||
| 818 | } | ||
| 819 | |||
| 820 | void RasterizerOpenGL::SetupComputeImages(const Shader* shader) { | ||
| 821 | const bool via_header_index = kepler_compute.launch_description.linked_tsc; | ||
| 822 | for (const auto& entry : shader->GetEntries().images) { | ||
| 823 | const auto handle = | ||
| 824 | GetTextureInfo(kepler_compute, via_header_index, entry, ShaderType::Compute); | ||
| 825 | image_view_indices.push_back(handle.image); | ||
| 826 | } | ||
| 827 | } | ||
| 828 | |||
| 829 | void RasterizerOpenGL::SyncState() { | 523 | void RasterizerOpenGL::SyncState() { |
| 830 | SyncViewport(); | 524 | SyncViewport(); |
| 831 | SyncRasterizeEnable(); | 525 | SyncRasterizeEnable(); |
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index d30ad698f..1f58f8791 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h | |||
| @@ -28,11 +28,9 @@ | |||
| 28 | #include "video_core/renderer_opengl/gl_query_cache.h" | 28 | #include "video_core/renderer_opengl/gl_query_cache.h" |
| 29 | #include "video_core/renderer_opengl/gl_resource_manager.h" | 29 | #include "video_core/renderer_opengl/gl_resource_manager.h" |
| 30 | #include "video_core/renderer_opengl/gl_shader_cache.h" | 30 | #include "video_core/renderer_opengl/gl_shader_cache.h" |
| 31 | #include "video_core/renderer_opengl/gl_shader_decompiler.h" | ||
| 32 | #include "video_core/renderer_opengl/gl_shader_manager.h" | 31 | #include "video_core/renderer_opengl/gl_shader_manager.h" |
| 33 | #include "video_core/renderer_opengl/gl_state_tracker.h" | 32 | #include "video_core/renderer_opengl/gl_state_tracker.h" |
| 34 | #include "video_core/renderer_opengl/gl_texture_cache.h" | 33 | #include "video_core/renderer_opengl/gl_texture_cache.h" |
| 35 | #include "video_core/shader/async_shaders.h" | ||
| 36 | #include "video_core/textures/texture.h" | 34 | #include "video_core/textures/texture.h" |
| 37 | 35 | ||
| 38 | namespace Core::Memory { | 36 | namespace Core::Memory { |
| @@ -81,7 +79,7 @@ public: | |||
| 81 | 79 | ||
| 82 | void Draw(bool is_indexed, bool is_instanced) override; | 80 | void Draw(bool is_indexed, bool is_instanced) override; |
| 83 | void Clear() override; | 81 | void Clear() override; |
| 84 | void DispatchCompute(GPUVAddr code_addr) override; | 82 | void DispatchCompute() override; |
| 85 | void ResetCounter(VideoCore::QueryType type) override; | 83 | void ResetCounter(VideoCore::QueryType type) override; |
| 86 | void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) override; | 84 | void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) override; |
| 87 | void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, u32 size) override; | 85 | void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, u32 size) override; |
| @@ -118,36 +116,11 @@ public: | |||
| 118 | return num_queued_commands > 0; | 116 | return num_queued_commands > 0; |
| 119 | } | 117 | } |
| 120 | 118 | ||
| 121 | VideoCommon::Shader::AsyncShaders& GetAsyncShaders() { | ||
| 122 | return async_shaders; | ||
| 123 | } | ||
| 124 | |||
| 125 | const VideoCommon::Shader::AsyncShaders& GetAsyncShaders() const { | ||
| 126 | return async_shaders; | ||
| 127 | } | ||
| 128 | |||
| 129 | private: | 119 | private: |
| 130 | static constexpr size_t MAX_TEXTURES = 192; | 120 | static constexpr size_t MAX_TEXTURES = 192; |
| 131 | static constexpr size_t MAX_IMAGES = 48; | 121 | static constexpr size_t MAX_IMAGES = 48; |
| 132 | static constexpr size_t MAX_IMAGE_VIEWS = MAX_TEXTURES + MAX_IMAGES; | 122 | static constexpr size_t MAX_IMAGE_VIEWS = MAX_TEXTURES + MAX_IMAGES; |
| 133 | 123 | ||
| 134 | void BindComputeTextures(Shader* kernel); | ||
| 135 | |||
| 136 | void BindTextures(const ShaderEntries& entries, GLuint base_texture, GLuint base_image, | ||
| 137 | size_t& image_view_index, size_t& texture_index, size_t& image_index); | ||
| 138 | |||
| 139 | /// Configures the current textures to use for the draw command. | ||
| 140 | void SetupDrawTextures(const Shader* shader, size_t stage_index); | ||
| 141 | |||
| 142 | /// Configures the textures used in a compute shader. | ||
| 143 | void SetupComputeTextures(const Shader* kernel); | ||
| 144 | |||
| 145 | /// Configures images in a graphics shader. | ||
| 146 | void SetupDrawImages(const Shader* shader, size_t stage_index); | ||
| 147 | |||
| 148 | /// Configures images in a compute shader. | ||
| 149 | void SetupComputeImages(const Shader* shader); | ||
| 150 | |||
| 151 | /// Syncs state to match guest's | 124 | /// Syncs state to match guest's |
| 152 | void SyncState(); | 125 | void SyncState(); |
| 153 | 126 | ||
| @@ -230,8 +203,6 @@ private: | |||
| 230 | /// End a transform feedback | 203 | /// End a transform feedback |
| 231 | void EndTransformFeedback(); | 204 | void EndTransformFeedback(); |
| 232 | 205 | ||
| 233 | void SetupShaders(bool is_indexed); | ||
| 234 | |||
| 235 | Tegra::GPU& gpu; | 206 | Tegra::GPU& gpu; |
| 236 | Tegra::Engines::Maxwell3D& maxwell3d; | 207 | Tegra::Engines::Maxwell3D& maxwell3d; |
| 237 | Tegra::Engines::KeplerCompute& kepler_compute; | 208 | Tegra::Engines::KeplerCompute& kepler_compute; |
| @@ -251,8 +222,6 @@ private: | |||
| 251 | AccelerateDMA accelerate_dma; | 222 | AccelerateDMA accelerate_dma; |
| 252 | FenceManagerOpenGL fence_manager; | 223 | FenceManagerOpenGL fence_manager; |
| 253 | 224 | ||
| 254 | VideoCommon::Shader::AsyncShaders async_shaders; | ||
| 255 | |||
| 256 | boost::container::static_vector<u32, MAX_IMAGE_VIEWS> image_view_indices; | 225 | boost::container::static_vector<u32, MAX_IMAGE_VIEWS> image_view_indices; |
| 257 | std::array<ImageViewId, MAX_IMAGE_VIEWS> image_view_ids; | 226 | std::array<ImageViewId, MAX_IMAGE_VIEWS> image_view_ids; |
| 258 | boost::container::static_vector<GLuint, MAX_TEXTURES> sampler_handles; | 227 | boost::container::static_vector<GLuint, MAX_TEXTURES> sampler_handles; |
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 5a01c59ec..4dd166156 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp | |||
| @@ -20,307 +20,19 @@ | |||
| 20 | #include "video_core/engines/maxwell_3d.h" | 20 | #include "video_core/engines/maxwell_3d.h" |
| 21 | #include "video_core/engines/shader_type.h" | 21 | #include "video_core/engines/shader_type.h" |
| 22 | #include "video_core/memory_manager.h" | 22 | #include "video_core/memory_manager.h" |
| 23 | #include "video_core/renderer_opengl/gl_arb_decompiler.h" | ||
| 24 | #include "video_core/renderer_opengl/gl_rasterizer.h" | 23 | #include "video_core/renderer_opengl/gl_rasterizer.h" |
| 25 | #include "video_core/renderer_opengl/gl_resource_manager.h" | 24 | #include "video_core/renderer_opengl/gl_resource_manager.h" |
| 26 | #include "video_core/renderer_opengl/gl_shader_cache.h" | 25 | #include "video_core/renderer_opengl/gl_shader_cache.h" |
| 27 | #include "video_core/renderer_opengl/gl_shader_decompiler.h" | ||
| 28 | #include "video_core/renderer_opengl/gl_shader_disk_cache.h" | ||
| 29 | #include "video_core/renderer_opengl/gl_state_tracker.h" | 26 | #include "video_core/renderer_opengl/gl_state_tracker.h" |
| 30 | #include "video_core/shader/memory_util.h" | ||
| 31 | #include "video_core/shader/registry.h" | ||
| 32 | #include "video_core/shader/shader_ir.h" | ||
| 33 | #include "video_core/shader_cache.h" | 27 | #include "video_core/shader_cache.h" |
| 34 | #include "video_core/shader_notify.h" | 28 | #include "video_core/shader_notify.h" |
| 35 | 29 | ||
| 36 | namespace OpenGL { | 30 | namespace OpenGL { |
| 37 | 31 | ||
| 38 | using Tegra::Engines::ShaderType; | 32 | Shader::Shader() = default; |
| 39 | using VideoCommon::Shader::GetShaderAddress; | ||
| 40 | using VideoCommon::Shader::GetShaderCode; | ||
| 41 | using VideoCommon::Shader::GetUniqueIdentifier; | ||
| 42 | using VideoCommon::Shader::KERNEL_MAIN_OFFSET; | ||
| 43 | using VideoCommon::Shader::ProgramCode; | ||
| 44 | using VideoCommon::Shader::Registry; | ||
| 45 | using VideoCommon::Shader::ShaderIR; | ||
| 46 | using VideoCommon::Shader::STAGE_MAIN_OFFSET; | ||
| 47 | |||
| 48 | namespace { | ||
| 49 | |||
| 50 | constexpr VideoCommon::Shader::CompilerSettings COMPILER_SETTINGS{}; | ||
| 51 | |||
| 52 | /// Gets the shader type from a Maxwell program type | ||
| 53 | constexpr GLenum GetGLShaderType(ShaderType shader_type) { | ||
| 54 | switch (shader_type) { | ||
| 55 | case ShaderType::Vertex: | ||
| 56 | return GL_VERTEX_SHADER; | ||
| 57 | case ShaderType::Geometry: | ||
| 58 | return GL_GEOMETRY_SHADER; | ||
| 59 | case ShaderType::Fragment: | ||
| 60 | return GL_FRAGMENT_SHADER; | ||
| 61 | case ShaderType::Compute: | ||
| 62 | return GL_COMPUTE_SHADER; | ||
| 63 | default: | ||
| 64 | return GL_NONE; | ||
| 65 | } | ||
| 66 | } | ||
| 67 | |||
| 68 | constexpr const char* GetShaderTypeName(ShaderType shader_type) { | ||
| 69 | switch (shader_type) { | ||
| 70 | case ShaderType::Vertex: | ||
| 71 | return "VS"; | ||
| 72 | case ShaderType::TesselationControl: | ||
| 73 | return "HS"; | ||
| 74 | case ShaderType::TesselationEval: | ||
| 75 | return "DS"; | ||
| 76 | case ShaderType::Geometry: | ||
| 77 | return "GS"; | ||
| 78 | case ShaderType::Fragment: | ||
| 79 | return "FS"; | ||
| 80 | case ShaderType::Compute: | ||
| 81 | return "CS"; | ||
| 82 | } | ||
| 83 | return "UNK"; | ||
| 84 | } | ||
| 85 | |||
| 86 | constexpr ShaderType GetShaderType(Maxwell::ShaderProgram program_type) { | ||
| 87 | switch (program_type) { | ||
| 88 | case Maxwell::ShaderProgram::VertexA: | ||
| 89 | case Maxwell::ShaderProgram::VertexB: | ||
| 90 | return ShaderType::Vertex; | ||
| 91 | case Maxwell::ShaderProgram::TesselationControl: | ||
| 92 | return ShaderType::TesselationControl; | ||
| 93 | case Maxwell::ShaderProgram::TesselationEval: | ||
| 94 | return ShaderType::TesselationEval; | ||
| 95 | case Maxwell::ShaderProgram::Geometry: | ||
| 96 | return ShaderType::Geometry; | ||
| 97 | case Maxwell::ShaderProgram::Fragment: | ||
| 98 | return ShaderType::Fragment; | ||
| 99 | } | ||
| 100 | return {}; | ||
| 101 | } | ||
| 102 | |||
| 103 | constexpr GLenum AssemblyEnum(ShaderType shader_type) { | ||
| 104 | switch (shader_type) { | ||
| 105 | case ShaderType::Vertex: | ||
| 106 | return GL_VERTEX_PROGRAM_NV; | ||
| 107 | case ShaderType::TesselationControl: | ||
| 108 | return GL_TESS_CONTROL_PROGRAM_NV; | ||
| 109 | case ShaderType::TesselationEval: | ||
| 110 | return GL_TESS_EVALUATION_PROGRAM_NV; | ||
| 111 | case ShaderType::Geometry: | ||
| 112 | return GL_GEOMETRY_PROGRAM_NV; | ||
| 113 | case ShaderType::Fragment: | ||
| 114 | return GL_FRAGMENT_PROGRAM_NV; | ||
| 115 | case ShaderType::Compute: | ||
| 116 | return GL_COMPUTE_PROGRAM_NV; | ||
| 117 | } | ||
| 118 | return {}; | ||
| 119 | } | ||
| 120 | |||
| 121 | std::string MakeShaderID(u64 unique_identifier, ShaderType shader_type) { | ||
| 122 | return fmt::format("{}{:016X}", GetShaderTypeName(shader_type), unique_identifier); | ||
| 123 | } | ||
| 124 | |||
| 125 | std::shared_ptr<Registry> MakeRegistry(const ShaderDiskCacheEntry& entry) { | ||
| 126 | const VideoCore::GuestDriverProfile guest_profile{entry.texture_handler_size}; | ||
| 127 | const VideoCommon::Shader::SerializedRegistryInfo info{guest_profile, entry.bound_buffer, | ||
| 128 | entry.graphics_info, entry.compute_info}; | ||
| 129 | auto registry = std::make_shared<Registry>(entry.type, info); | ||
| 130 | for (const auto& [address, value] : entry.keys) { | ||
| 131 | const auto [buffer, offset] = address; | ||
| 132 | registry->InsertKey(buffer, offset, value); | ||
| 133 | } | ||
| 134 | for (const auto& [offset, sampler] : entry.bound_samplers) { | ||
| 135 | registry->InsertBoundSampler(offset, sampler); | ||
| 136 | } | ||
| 137 | for (const auto& [key, sampler] : entry.bindless_samplers) { | ||
| 138 | const auto [buffer, offset] = key; | ||
| 139 | registry->InsertBindlessSampler(buffer, offset, sampler); | ||
| 140 | } | ||
| 141 | return registry; | ||
| 142 | } | ||
| 143 | |||
| 144 | std::unordered_set<GLenum> GetSupportedFormats() { | ||
| 145 | GLint num_formats; | ||
| 146 | glGetIntegerv(GL_NUM_PROGRAM_BINARY_FORMATS, &num_formats); | ||
| 147 | |||
| 148 | std::vector<GLint> formats(num_formats); | ||
| 149 | glGetIntegerv(GL_PROGRAM_BINARY_FORMATS, formats.data()); | ||
| 150 | |||
| 151 | std::unordered_set<GLenum> supported_formats; | ||
| 152 | for (const GLint format : formats) { | ||
| 153 | supported_formats.insert(static_cast<GLenum>(format)); | ||
| 154 | } | ||
| 155 | return supported_formats; | ||
| 156 | } | ||
| 157 | |||
| 158 | } // Anonymous namespace | ||
| 159 | |||
| 160 | ProgramSharedPtr BuildShader(const Device& device, ShaderType shader_type, u64 unique_identifier, | ||
| 161 | const ShaderIR& ir, const Registry& registry, bool hint_retrievable) { | ||
| 162 | if (device.UseDriverCache()) { | ||
| 163 | // Ignore hint retrievable if we are using the driver cache | ||
| 164 | hint_retrievable = false; | ||
| 165 | } | ||
| 166 | const std::string shader_id = MakeShaderID(unique_identifier, shader_type); | ||
| 167 | LOG_INFO(Render_OpenGL, "{}", shader_id); | ||
| 168 | |||
| 169 | auto program = std::make_shared<ProgramHandle>(); | ||
| 170 | |||
| 171 | if (device.UseAssemblyShaders()) { | ||
| 172 | const std::string arb = | ||
| 173 | DecompileAssemblyShader(device, ir, registry, shader_type, shader_id); | ||
| 174 | |||
| 175 | GLuint& arb_prog = program->assembly_program.handle; | ||
| 176 | |||
| 177 | // Commented out functions signal OpenGL errors but are compatible with apitrace. | ||
| 178 | // Use them only to capture and replay on apitrace. | ||
| 179 | #if 0 | ||
| 180 | glGenProgramsNV(1, &arb_prog); | ||
| 181 | glLoadProgramNV(AssemblyEnum(shader_type), arb_prog, static_cast<GLsizei>(arb.size()), | ||
| 182 | reinterpret_cast<const GLubyte*>(arb.data())); | ||
| 183 | #else | ||
| 184 | glGenProgramsARB(1, &arb_prog); | ||
| 185 | glNamedProgramStringEXT(arb_prog, AssemblyEnum(shader_type), GL_PROGRAM_FORMAT_ASCII_ARB, | ||
| 186 | static_cast<GLsizei>(arb.size()), arb.data()); | ||
| 187 | #endif | ||
| 188 | const auto err = reinterpret_cast<const char*>(glGetString(GL_PROGRAM_ERROR_STRING_NV)); | ||
| 189 | if (err && *err) { | ||
| 190 | LOG_CRITICAL(Render_OpenGL, "{}", err); | ||
| 191 | LOG_INFO(Render_OpenGL, "\n{}", arb); | ||
| 192 | } | ||
| 193 | } else { | ||
| 194 | const std::string glsl = DecompileShader(device, ir, registry, shader_type, shader_id); | ||
| 195 | OGLShader shader; | ||
| 196 | shader.Create(glsl.c_str(), GetGLShaderType(shader_type)); | ||
| 197 | |||
| 198 | program->source_program.Create(true, hint_retrievable, shader.handle); | ||
| 199 | } | ||
| 200 | |||
| 201 | return program; | ||
| 202 | } | ||
| 203 | |||
| 204 | Shader::Shader(std::shared_ptr<Registry> registry_, ShaderEntries entries_, | ||
| 205 | ProgramSharedPtr program_, bool is_built_) | ||
| 206 | : registry{std::move(registry_)}, entries{std::move(entries_)}, program{std::move(program_)}, | ||
| 207 | is_built{is_built_} { | ||
| 208 | handle = program->assembly_program.handle; | ||
| 209 | if (handle == 0) { | ||
| 210 | handle = program->source_program.handle; | ||
| 211 | } | ||
| 212 | if (is_built) { | ||
| 213 | ASSERT(handle != 0); | ||
| 214 | } | ||
| 215 | } | ||
| 216 | 33 | ||
| 217 | Shader::~Shader() = default; | 34 | Shader::~Shader() = default; |
| 218 | 35 | ||
| 219 | GLuint Shader::GetHandle() const { | ||
| 220 | DEBUG_ASSERT(registry->IsConsistent()); | ||
| 221 | return handle; | ||
| 222 | } | ||
| 223 | |||
| 224 | bool Shader::IsBuilt() const { | ||
| 225 | return is_built; | ||
| 226 | } | ||
| 227 | |||
| 228 | void Shader::AsyncOpenGLBuilt(OGLProgram new_program) { | ||
| 229 | program->source_program = std::move(new_program); | ||
| 230 | handle = program->source_program.handle; | ||
| 231 | is_built = true; | ||
| 232 | } | ||
| 233 | |||
| 234 | void Shader::AsyncGLASMBuilt(OGLAssemblyProgram new_program) { | ||
| 235 | program->assembly_program = std::move(new_program); | ||
| 236 | handle = program->assembly_program.handle; | ||
| 237 | is_built = true; | ||
| 238 | } | ||
| 239 | |||
| 240 | std::unique_ptr<Shader> Shader::CreateStageFromMemory( | ||
| 241 | const ShaderParameters& params, Maxwell::ShaderProgram program_type, ProgramCode code, | ||
| 242 | ProgramCode code_b, VideoCommon::Shader::AsyncShaders& async_shaders, VAddr cpu_addr) { | ||
| 243 | const auto shader_type = GetShaderType(program_type); | ||
| 244 | |||
| 245 | auto& gpu = params.gpu; | ||
| 246 | gpu.ShaderNotify().MarkSharderBuilding(); | ||
| 247 | |||
| 248 | auto registry = std::make_shared<Registry>(shader_type, gpu.Maxwell3D()); | ||
| 249 | if (!async_shaders.IsShaderAsync(gpu) || !params.device.UseAsynchronousShaders()) { | ||
| 250 | const ShaderIR ir(code, STAGE_MAIN_OFFSET, COMPILER_SETTINGS, *registry); | ||
| 251 | // TODO(Rodrigo): Handle VertexA shaders | ||
| 252 | // std::optional<ShaderIR> ir_b; | ||
| 253 | // if (!code_b.empty()) { | ||
| 254 | // ir_b.emplace(code_b, STAGE_MAIN_OFFSET); | ||
| 255 | // } | ||
| 256 | auto program = | ||
| 257 | BuildShader(params.device, shader_type, params.unique_identifier, ir, *registry); | ||
| 258 | ShaderDiskCacheEntry entry; | ||
| 259 | entry.type = shader_type; | ||
| 260 | entry.code = std::move(code); | ||
| 261 | entry.code_b = std::move(code_b); | ||
| 262 | entry.unique_identifier = params.unique_identifier; | ||
| 263 | entry.bound_buffer = registry->GetBoundBuffer(); | ||
| 264 | entry.graphics_info = registry->GetGraphicsInfo(); | ||
| 265 | entry.keys = registry->GetKeys(); | ||
| 266 | entry.bound_samplers = registry->GetBoundSamplers(); | ||
| 267 | entry.bindless_samplers = registry->GetBindlessSamplers(); | ||
| 268 | params.disk_cache.SaveEntry(std::move(entry)); | ||
| 269 | |||
| 270 | gpu.ShaderNotify().MarkShaderComplete(); | ||
| 271 | |||
| 272 | return std::unique_ptr<Shader>(new Shader(std::move(registry), | ||
| 273 | MakeEntries(params.device, ir, shader_type), | ||
| 274 | std::move(program), true)); | ||
| 275 | } else { | ||
| 276 | // Required for entries | ||
| 277 | const ShaderIR ir(code, STAGE_MAIN_OFFSET, COMPILER_SETTINGS, *registry); | ||
| 278 | auto entries = MakeEntries(params.device, ir, shader_type); | ||
| 279 | |||
| 280 | async_shaders.QueueOpenGLShader(params.device, shader_type, params.unique_identifier, | ||
| 281 | std::move(code), std::move(code_b), STAGE_MAIN_OFFSET, | ||
| 282 | COMPILER_SETTINGS, *registry, cpu_addr); | ||
| 283 | |||
| 284 | auto program = std::make_shared<ProgramHandle>(); | ||
| 285 | return std::unique_ptr<Shader>( | ||
| 286 | new Shader(std::move(registry), std::move(entries), std::move(program), false)); | ||
| 287 | } | ||
| 288 | } | ||
| 289 | |||
| 290 | std::unique_ptr<Shader> Shader::CreateKernelFromMemory(const ShaderParameters& params, | ||
| 291 | ProgramCode code) { | ||
| 292 | auto& gpu = params.gpu; | ||
| 293 | gpu.ShaderNotify().MarkSharderBuilding(); | ||
| 294 | |||
| 295 | auto registry = std::make_shared<Registry>(ShaderType::Compute, params.engine); | ||
| 296 | const ShaderIR ir(code, KERNEL_MAIN_OFFSET, COMPILER_SETTINGS, *registry); | ||
| 297 | const u64 uid = params.unique_identifier; | ||
| 298 | auto program = BuildShader(params.device, ShaderType::Compute, uid, ir, *registry); | ||
| 299 | |||
| 300 | ShaderDiskCacheEntry entry; | ||
| 301 | entry.type = ShaderType::Compute; | ||
| 302 | entry.code = std::move(code); | ||
| 303 | entry.unique_identifier = uid; | ||
| 304 | entry.bound_buffer = registry->GetBoundBuffer(); | ||
| 305 | entry.compute_info = registry->GetComputeInfo(); | ||
| 306 | entry.keys = registry->GetKeys(); | ||
| 307 | entry.bound_samplers = registry->GetBoundSamplers(); | ||
| 308 | entry.bindless_samplers = registry->GetBindlessSamplers(); | ||
| 309 | params.disk_cache.SaveEntry(std::move(entry)); | ||
| 310 | |||
| 311 | gpu.ShaderNotify().MarkShaderComplete(); | ||
| 312 | |||
| 313 | return std::unique_ptr<Shader>(new Shader(std::move(registry), | ||
| 314 | MakeEntries(params.device, ir, ShaderType::Compute), | ||
| 315 | std::move(program))); | ||
| 316 | } | ||
| 317 | |||
| 318 | std::unique_ptr<Shader> Shader::CreateFromCache(const ShaderParameters& params, | ||
| 319 | const PrecompiledShader& precompiled_shader) { | ||
| 320 | return std::unique_ptr<Shader>(new Shader( | ||
| 321 | precompiled_shader.registry, precompiled_shader.entries, precompiled_shader.program)); | ||
| 322 | } | ||
| 323 | |||
| 324 | ShaderCacheOpenGL::ShaderCacheOpenGL(RasterizerOpenGL& rasterizer_, | 36 | ShaderCacheOpenGL::ShaderCacheOpenGL(RasterizerOpenGL& rasterizer_, |
| 325 | Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_, | 37 | Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_, |
| 326 | Tegra::Engines::Maxwell3D& maxwell3d_, | 38 | Tegra::Engines::Maxwell3D& maxwell3d_, |
| @@ -331,278 +43,4 @@ ShaderCacheOpenGL::ShaderCacheOpenGL(RasterizerOpenGL& rasterizer_, | |||
| 331 | 43 | ||
| 332 | ShaderCacheOpenGL::~ShaderCacheOpenGL() = default; | 44 | ShaderCacheOpenGL::~ShaderCacheOpenGL() = default; |
| 333 | 45 | ||
| 334 | void ShaderCacheOpenGL::LoadDiskCache(u64 title_id, std::stop_token stop_loading, | ||
| 335 | const VideoCore::DiskResourceLoadCallback& callback) { | ||
| 336 | disk_cache.BindTitleID(title_id); | ||
| 337 | const std::optional transferable = disk_cache.LoadTransferable(); | ||
| 338 | |||
| 339 | LOG_INFO(Render_OpenGL, "Total Shader Count: {}", | ||
| 340 | transferable.has_value() ? transferable->size() : 0); | ||
| 341 | |||
| 342 | if (!transferable) { | ||
| 343 | return; | ||
| 344 | } | ||
| 345 | |||
| 346 | std::vector<ShaderDiskCachePrecompiled> gl_cache; | ||
| 347 | if (!device.UseAssemblyShaders() && !device.UseDriverCache()) { | ||
| 348 | // Only load precompiled cache when we are not using assembly shaders | ||
| 349 | gl_cache = disk_cache.LoadPrecompiled(); | ||
| 350 | } | ||
| 351 | const auto supported_formats = GetSupportedFormats(); | ||
| 352 | |||
| 353 | // Track if precompiled cache was altered during loading to know if we have to | ||
| 354 | // serialize the virtual precompiled cache file back to the hard drive | ||
| 355 | bool precompiled_cache_altered = false; | ||
| 356 | |||
| 357 | // Inform the frontend about shader build initialization | ||
| 358 | if (callback) { | ||
| 359 | callback(VideoCore::LoadCallbackStage::Build, 0, transferable->size()); | ||
| 360 | } | ||
| 361 | |||
| 362 | std::mutex mutex; | ||
| 363 | std::size_t built_shaders = 0; // It doesn't have be atomic since it's used behind a mutex | ||
| 364 | std::atomic_bool gl_cache_failed = false; | ||
| 365 | |||
| 366 | const auto find_precompiled = [&gl_cache](u64 id) { | ||
| 367 | return std::ranges::find(gl_cache, id, &ShaderDiskCachePrecompiled::unique_identifier); | ||
| 368 | }; | ||
| 369 | |||
| 370 | const auto worker = [&](Core::Frontend::GraphicsContext* context, std::size_t begin, | ||
| 371 | std::size_t end) { | ||
| 372 | const auto scope = context->Acquire(); | ||
| 373 | |||
| 374 | for (std::size_t i = begin; i < end; ++i) { | ||
| 375 | if (stop_loading.stop_requested()) { | ||
| 376 | return; | ||
| 377 | } | ||
| 378 | const auto& entry = (*transferable)[i]; | ||
| 379 | const u64 uid = entry.unique_identifier; | ||
| 380 | const auto it = find_precompiled(uid); | ||
| 381 | const auto precompiled_entry = it != gl_cache.end() ? &*it : nullptr; | ||
| 382 | |||
| 383 | const bool is_compute = entry.type == ShaderType::Compute; | ||
| 384 | const u32 main_offset = is_compute ? KERNEL_MAIN_OFFSET : STAGE_MAIN_OFFSET; | ||
| 385 | auto registry = MakeRegistry(entry); | ||
| 386 | const ShaderIR ir(entry.code, main_offset, COMPILER_SETTINGS, *registry); | ||
| 387 | |||
| 388 | ProgramSharedPtr program; | ||
| 389 | if (precompiled_entry) { | ||
| 390 | // If the shader is precompiled, attempt to load it with | ||
| 391 | program = GeneratePrecompiledProgram(entry, *precompiled_entry, supported_formats); | ||
| 392 | if (!program) { | ||
| 393 | gl_cache_failed = true; | ||
| 394 | } | ||
| 395 | } | ||
| 396 | if (!program) { | ||
| 397 | // Otherwise compile it from GLSL | ||
| 398 | program = BuildShader(device, entry.type, uid, ir, *registry, true); | ||
| 399 | } | ||
| 400 | |||
| 401 | PrecompiledShader shader; | ||
| 402 | shader.program = std::move(program); | ||
| 403 | shader.registry = std::move(registry); | ||
| 404 | shader.entries = MakeEntries(device, ir, entry.type); | ||
| 405 | |||
| 406 | std::scoped_lock lock{mutex}; | ||
| 407 | if (callback) { | ||
| 408 | callback(VideoCore::LoadCallbackStage::Build, ++built_shaders, | ||
| 409 | transferable->size()); | ||
| 410 | } | ||
| 411 | runtime_cache.emplace(entry.unique_identifier, std::move(shader)); | ||
| 412 | } | ||
| 413 | }; | ||
| 414 | |||
| 415 | const std::size_t num_workers{std::max(1U, std::thread::hardware_concurrency())}; | ||
| 416 | const std::size_t bucket_size{transferable->size() / num_workers}; | ||
| 417 | std::vector<std::unique_ptr<Core::Frontend::GraphicsContext>> contexts(num_workers); | ||
| 418 | std::vector<std::thread> threads(num_workers); | ||
| 419 | for (std::size_t i = 0; i < num_workers; ++i) { | ||
| 420 | const bool is_last_worker = i + 1 == num_workers; | ||
| 421 | const std::size_t start{bucket_size * i}; | ||
| 422 | const std::size_t end{is_last_worker ? transferable->size() : start + bucket_size}; | ||
| 423 | |||
| 424 | // On some platforms the shared context has to be created from the GUI thread | ||
| 425 | contexts[i] = emu_window.CreateSharedContext(); | ||
| 426 | threads[i] = std::thread(worker, contexts[i].get(), start, end); | ||
| 427 | } | ||
| 428 | for (auto& thread : threads) { | ||
| 429 | thread.join(); | ||
| 430 | } | ||
| 431 | |||
| 432 | if (gl_cache_failed) { | ||
| 433 | // Invalidate the precompiled cache if a shader dumped shader was rejected | ||
| 434 | disk_cache.InvalidatePrecompiled(); | ||
| 435 | precompiled_cache_altered = true; | ||
| 436 | return; | ||
| 437 | } | ||
| 438 | if (stop_loading.stop_requested()) { | ||
| 439 | return; | ||
| 440 | } | ||
| 441 | |||
| 442 | if (device.UseAssemblyShaders() || device.UseDriverCache()) { | ||
| 443 | // Don't store precompiled binaries for assembly shaders or when using the driver cache | ||
| 444 | return; | ||
| 445 | } | ||
| 446 | |||
| 447 | // TODO(Rodrigo): Do state tracking for transferable shaders and do a dummy draw | ||
| 448 | // before precompiling them | ||
| 449 | |||
| 450 | for (std::size_t i = 0; i < transferable->size(); ++i) { | ||
| 451 | const u64 id = (*transferable)[i].unique_identifier; | ||
| 452 | const auto it = find_precompiled(id); | ||
| 453 | if (it == gl_cache.end()) { | ||
| 454 | const GLuint program = runtime_cache.at(id).program->source_program.handle; | ||
| 455 | disk_cache.SavePrecompiled(id, program); | ||
| 456 | precompiled_cache_altered = true; | ||
| 457 | } | ||
| 458 | } | ||
| 459 | |||
| 460 | if (precompiled_cache_altered) { | ||
| 461 | disk_cache.SaveVirtualPrecompiledFile(); | ||
| 462 | } | ||
| 463 | } | ||
| 464 | |||
| 465 | ProgramSharedPtr ShaderCacheOpenGL::GeneratePrecompiledProgram( | ||
| 466 | const ShaderDiskCacheEntry& entry, const ShaderDiskCachePrecompiled& precompiled_entry, | ||
| 467 | const std::unordered_set<GLenum>& supported_formats) { | ||
| 468 | if (!supported_formats.contains(precompiled_entry.binary_format)) { | ||
| 469 | LOG_INFO(Render_OpenGL, "Precompiled cache entry with unsupported format, removing"); | ||
| 470 | return {}; | ||
| 471 | } | ||
| 472 | |||
| 473 | auto program = std::make_shared<ProgramHandle>(); | ||
| 474 | GLuint& handle = program->source_program.handle; | ||
| 475 | handle = glCreateProgram(); | ||
| 476 | glProgramParameteri(handle, GL_PROGRAM_SEPARABLE, GL_TRUE); | ||
| 477 | glProgramBinary(handle, precompiled_entry.binary_format, precompiled_entry.binary.data(), | ||
| 478 | static_cast<GLsizei>(precompiled_entry.binary.size())); | ||
| 479 | |||
| 480 | GLint link_status; | ||
| 481 | glGetProgramiv(handle, GL_LINK_STATUS, &link_status); | ||
| 482 | if (link_status == GL_FALSE) { | ||
| 483 | LOG_INFO(Render_OpenGL, "Precompiled cache rejected by the driver, removing"); | ||
| 484 | return {}; | ||
| 485 | } | ||
| 486 | |||
| 487 | return program; | ||
| 488 | } | ||
| 489 | |||
| 490 | Shader* ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program, | ||
| 491 | VideoCommon::Shader::AsyncShaders& async_shaders) { | ||
| 492 | if (!maxwell3d.dirty.flags[Dirty::Shaders]) { | ||
| 493 | auto* last_shader = last_shaders[static_cast<std::size_t>(program)]; | ||
| 494 | if (last_shader->IsBuilt()) { | ||
| 495 | return last_shader; | ||
| 496 | } | ||
| 497 | } | ||
| 498 | |||
| 499 | const GPUVAddr address{GetShaderAddress(maxwell3d, program)}; | ||
| 500 | |||
| 501 | if (device.UseAsynchronousShaders() && async_shaders.HasCompletedWork()) { | ||
| 502 | auto completed_work = async_shaders.GetCompletedWork(); | ||
| 503 | for (auto& work : completed_work) { | ||
| 504 | Shader* shader = TryGet(work.cpu_address); | ||
| 505 | gpu.ShaderNotify().MarkShaderComplete(); | ||
| 506 | if (shader == nullptr) { | ||
| 507 | continue; | ||
| 508 | } | ||
| 509 | using namespace VideoCommon::Shader; | ||
| 510 | if (work.backend == AsyncShaders::Backend::OpenGL) { | ||
| 511 | shader->AsyncOpenGLBuilt(std::move(work.program.opengl)); | ||
| 512 | } else if (work.backend == AsyncShaders::Backend::GLASM) { | ||
| 513 | shader->AsyncGLASMBuilt(std::move(work.program.glasm)); | ||
| 514 | } | ||
| 515 | |||
| 516 | auto& registry = shader->GetRegistry(); | ||
| 517 | |||
| 518 | ShaderDiskCacheEntry entry; | ||
| 519 | entry.type = work.shader_type; | ||
| 520 | entry.code = std::move(work.code); | ||
| 521 | entry.code_b = std::move(work.code_b); | ||
| 522 | entry.unique_identifier = work.uid; | ||
| 523 | entry.bound_buffer = registry.GetBoundBuffer(); | ||
| 524 | entry.graphics_info = registry.GetGraphicsInfo(); | ||
| 525 | entry.keys = registry.GetKeys(); | ||
| 526 | entry.bound_samplers = registry.GetBoundSamplers(); | ||
| 527 | entry.bindless_samplers = registry.GetBindlessSamplers(); | ||
| 528 | disk_cache.SaveEntry(std::move(entry)); | ||
| 529 | } | ||
| 530 | } | ||
| 531 | |||
| 532 | // Look up shader in the cache based on address | ||
| 533 | const std::optional<VAddr> cpu_addr{gpu_memory.GpuToCpuAddress(address)}; | ||
| 534 | if (Shader* const shader{cpu_addr ? TryGet(*cpu_addr) : null_shader.get()}) { | ||
| 535 | return last_shaders[static_cast<std::size_t>(program)] = shader; | ||
| 536 | } | ||
| 537 | |||
| 538 | const u8* const host_ptr{gpu_memory.GetPointer(address)}; | ||
| 539 | |||
| 540 | // No shader found - create a new one | ||
| 541 | ProgramCode code{GetShaderCode(gpu_memory, address, host_ptr, false)}; | ||
| 542 | ProgramCode code_b; | ||
| 543 | if (program == Maxwell::ShaderProgram::VertexA) { | ||
| 544 | const GPUVAddr address_b{GetShaderAddress(maxwell3d, Maxwell::ShaderProgram::VertexB)}; | ||
| 545 | const u8* host_ptr_b = gpu_memory.GetPointer(address_b); | ||
| 546 | code_b = GetShaderCode(gpu_memory, address_b, host_ptr_b, false); | ||
| 547 | } | ||
| 548 | const std::size_t code_size = code.size() * sizeof(u64); | ||
| 549 | |||
| 550 | const u64 unique_identifier = GetUniqueIdentifier( | ||
| 551 | GetShaderType(program), program == Maxwell::ShaderProgram::VertexA, code, code_b); | ||
| 552 | |||
| 553 | const ShaderParameters params{gpu, maxwell3d, disk_cache, device, | ||
| 554 | *cpu_addr, host_ptr, unique_identifier}; | ||
| 555 | |||
| 556 | std::unique_ptr<Shader> shader; | ||
| 557 | const auto found = runtime_cache.find(unique_identifier); | ||
| 558 | if (found == runtime_cache.end()) { | ||
| 559 | shader = Shader::CreateStageFromMemory(params, program, std::move(code), std::move(code_b), | ||
| 560 | async_shaders, cpu_addr.value_or(0)); | ||
| 561 | } else { | ||
| 562 | shader = Shader::CreateFromCache(params, found->second); | ||
| 563 | } | ||
| 564 | |||
| 565 | Shader* const result = shader.get(); | ||
| 566 | if (cpu_addr) { | ||
| 567 | Register(std::move(shader), *cpu_addr, code_size); | ||
| 568 | } else { | ||
| 569 | null_shader = std::move(shader); | ||
| 570 | } | ||
| 571 | |||
| 572 | return last_shaders[static_cast<std::size_t>(program)] = result; | ||
| 573 | } | ||
| 574 | |||
| 575 | Shader* ShaderCacheOpenGL::GetComputeKernel(GPUVAddr code_addr) { | ||
| 576 | const std::optional<VAddr> cpu_addr{gpu_memory.GpuToCpuAddress(code_addr)}; | ||
| 577 | |||
| 578 | if (Shader* const kernel = cpu_addr ? TryGet(*cpu_addr) : null_kernel.get()) { | ||
| 579 | return kernel; | ||
| 580 | } | ||
| 581 | |||
| 582 | // No kernel found, create a new one | ||
| 583 | const u8* host_ptr{gpu_memory.GetPointer(code_addr)}; | ||
| 584 | ProgramCode code{GetShaderCode(gpu_memory, code_addr, host_ptr, true)}; | ||
| 585 | const std::size_t code_size{code.size() * sizeof(u64)}; | ||
| 586 | const u64 unique_identifier{GetUniqueIdentifier(ShaderType::Compute, false, code)}; | ||
| 587 | |||
| 588 | const ShaderParameters params{gpu, kepler_compute, disk_cache, device, | ||
| 589 | *cpu_addr, host_ptr, unique_identifier}; | ||
| 590 | |||
| 591 | std::unique_ptr<Shader> kernel; | ||
| 592 | const auto found = runtime_cache.find(unique_identifier); | ||
| 593 | if (found == runtime_cache.end()) { | ||
| 594 | kernel = Shader::CreateKernelFromMemory(params, std::move(code)); | ||
| 595 | } else { | ||
| 596 | kernel = Shader::CreateFromCache(params, found->second); | ||
| 597 | } | ||
| 598 | |||
| 599 | Shader* const result = kernel.get(); | ||
| 600 | if (cpu_addr) { | ||
| 601 | Register(std::move(kernel), *cpu_addr, code_size); | ||
| 602 | } else { | ||
| 603 | null_kernel = std::move(kernel); | ||
| 604 | } | ||
| 605 | return result; | ||
| 606 | } | ||
| 607 | |||
| 608 | } // namespace OpenGL | 46 | } // namespace OpenGL |
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h index b30308b6f..ad3d15a76 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.h +++ b/src/video_core/renderer_opengl/gl_shader_cache.h | |||
| @@ -19,10 +19,6 @@ | |||
| 19 | #include "common/common_types.h" | 19 | #include "common/common_types.h" |
| 20 | #include "video_core/engines/shader_type.h" | 20 | #include "video_core/engines/shader_type.h" |
| 21 | #include "video_core/renderer_opengl/gl_resource_manager.h" | 21 | #include "video_core/renderer_opengl/gl_resource_manager.h" |
| 22 | #include "video_core/renderer_opengl/gl_shader_decompiler.h" | ||
| 23 | #include "video_core/renderer_opengl/gl_shader_disk_cache.h" | ||
| 24 | #include "video_core/shader/registry.h" | ||
| 25 | #include "video_core/shader/shader_ir.h" | ||
| 26 | #include "video_core/shader_cache.h" | 22 | #include "video_core/shader_cache.h" |
| 27 | 23 | ||
| 28 | namespace Tegra { | 24 | namespace Tegra { |
| @@ -33,10 +29,6 @@ namespace Core::Frontend { | |||
| 33 | class EmuWindow; | 29 | class EmuWindow; |
| 34 | } | 30 | } |
| 35 | 31 | ||
| 36 | namespace VideoCommon::Shader { | ||
| 37 | class AsyncShaders; | ||
| 38 | } | ||
| 39 | |||
| 40 | namespace OpenGL { | 32 | namespace OpenGL { |
| 41 | 33 | ||
| 42 | class Device; | 34 | class Device; |
| @@ -44,77 +36,10 @@ class RasterizerOpenGL; | |||
| 44 | 36 | ||
| 45 | using Maxwell = Tegra::Engines::Maxwell3D::Regs; | 37 | using Maxwell = Tegra::Engines::Maxwell3D::Regs; |
| 46 | 38 | ||
| 47 | struct ProgramHandle { | 39 | class Shader { |
| 48 | OGLProgram source_program; | ||
| 49 | OGLAssemblyProgram assembly_program; | ||
| 50 | }; | ||
| 51 | using ProgramSharedPtr = std::shared_ptr<ProgramHandle>; | ||
| 52 | |||
| 53 | struct PrecompiledShader { | ||
| 54 | ProgramSharedPtr program; | ||
| 55 | std::shared_ptr<VideoCommon::Shader::Registry> registry; | ||
| 56 | ShaderEntries entries; | ||
| 57 | }; | ||
| 58 | |||
| 59 | struct ShaderParameters { | ||
| 60 | Tegra::GPU& gpu; | ||
| 61 | Tegra::Engines::ConstBufferEngineInterface& engine; | ||
| 62 | ShaderDiskCacheOpenGL& disk_cache; | ||
| 63 | const Device& device; | ||
| 64 | VAddr cpu_addr; | ||
| 65 | const u8* host_ptr; | ||
| 66 | u64 unique_identifier; | ||
| 67 | }; | ||
| 68 | |||
| 69 | ProgramSharedPtr BuildShader(const Device& device, Tegra::Engines::ShaderType shader_type, | ||
| 70 | u64 unique_identifier, const VideoCommon::Shader::ShaderIR& ir, | ||
| 71 | const VideoCommon::Shader::Registry& registry, | ||
| 72 | bool hint_retrievable = false); | ||
| 73 | |||
| 74 | class Shader final { | ||
| 75 | public: | 40 | public: |
| 41 | explicit Shader(); | ||
| 76 | ~Shader(); | 42 | ~Shader(); |
| 77 | |||
| 78 | /// Gets the GL program handle for the shader | ||
| 79 | GLuint GetHandle() const; | ||
| 80 | |||
| 81 | bool IsBuilt() const; | ||
| 82 | |||
| 83 | /// Gets the shader entries for the shader | ||
| 84 | const ShaderEntries& GetEntries() const { | ||
| 85 | return entries; | ||
| 86 | } | ||
| 87 | |||
| 88 | const VideoCommon::Shader::Registry& GetRegistry() const { | ||
| 89 | return *registry; | ||
| 90 | } | ||
| 91 | |||
| 92 | /// Mark a OpenGL shader as built | ||
| 93 | void AsyncOpenGLBuilt(OGLProgram new_program); | ||
| 94 | |||
| 95 | /// Mark a GLASM shader as built | ||
| 96 | void AsyncGLASMBuilt(OGLAssemblyProgram new_program); | ||
| 97 | |||
| 98 | static std::unique_ptr<Shader> CreateStageFromMemory( | ||
| 99 | const ShaderParameters& params, Maxwell::ShaderProgram program_type, | ||
| 100 | ProgramCode program_code, ProgramCode program_code_b, | ||
| 101 | VideoCommon::Shader::AsyncShaders& async_shaders, VAddr cpu_addr); | ||
| 102 | |||
| 103 | static std::unique_ptr<Shader> CreateKernelFromMemory(const ShaderParameters& params, | ||
| 104 | ProgramCode code); | ||
| 105 | |||
| 106 | static std::unique_ptr<Shader> CreateFromCache(const ShaderParameters& params, | ||
| 107 | const PrecompiledShader& precompiled_shader); | ||
| 108 | |||
| 109 | private: | ||
| 110 | explicit Shader(std::shared_ptr<VideoCommon::Shader::Registry> registry, ShaderEntries entries, | ||
| 111 | ProgramSharedPtr program, bool is_built_ = true); | ||
| 112 | |||
| 113 | std::shared_ptr<VideoCommon::Shader::Registry> registry; | ||
| 114 | ShaderEntries entries; | ||
| 115 | ProgramSharedPtr program; | ||
| 116 | GLuint handle = 0; | ||
| 117 | bool is_built{}; | ||
| 118 | }; | 43 | }; |
| 119 | 44 | ||
| 120 | class ShaderCacheOpenGL final : public VideoCommon::ShaderCache<Shader> { | 45 | class ShaderCacheOpenGL final : public VideoCommon::ShaderCache<Shader> { |
| @@ -126,36 +51,13 @@ public: | |||
| 126 | Tegra::MemoryManager& gpu_memory_, const Device& device_); | 51 | Tegra::MemoryManager& gpu_memory_, const Device& device_); |
| 127 | ~ShaderCacheOpenGL() override; | 52 | ~ShaderCacheOpenGL() override; |
| 128 | 53 | ||
| 129 | /// Loads disk cache for the current game | ||
| 130 | void LoadDiskCache(u64 title_id, std::stop_token stop_loading, | ||
| 131 | const VideoCore::DiskResourceLoadCallback& callback); | ||
| 132 | |||
| 133 | /// Gets the current specified shader stage program | ||
| 134 | Shader* GetStageProgram(Maxwell::ShaderProgram program, | ||
| 135 | VideoCommon::Shader::AsyncShaders& async_shaders); | ||
| 136 | |||
| 137 | /// Gets a compute kernel in the passed address | ||
| 138 | Shader* GetComputeKernel(GPUVAddr code_addr); | ||
| 139 | |||
| 140 | private: | 54 | private: |
| 141 | ProgramSharedPtr GeneratePrecompiledProgram( | ||
| 142 | const ShaderDiskCacheEntry& entry, const ShaderDiskCachePrecompiled& precompiled_entry, | ||
| 143 | const std::unordered_set<GLenum>& supported_formats); | ||
| 144 | |||
| 145 | Core::Frontend::EmuWindow& emu_window; | 55 | Core::Frontend::EmuWindow& emu_window; |
| 146 | Tegra::GPU& gpu; | 56 | Tegra::GPU& gpu; |
| 147 | Tegra::MemoryManager& gpu_memory; | 57 | Tegra::MemoryManager& gpu_memory; |
| 148 | Tegra::Engines::Maxwell3D& maxwell3d; | 58 | Tegra::Engines::Maxwell3D& maxwell3d; |
| 149 | Tegra::Engines::KeplerCompute& kepler_compute; | 59 | Tegra::Engines::KeplerCompute& kepler_compute; |
| 150 | const Device& device; | 60 | const Device& device; |
| 151 | |||
| 152 | ShaderDiskCacheOpenGL disk_cache; | ||
| 153 | std::unordered_map<u64, PrecompiledShader> runtime_cache; | ||
| 154 | |||
| 155 | std::unique_ptr<Shader> null_shader; | ||
| 156 | std::unique_ptr<Shader> null_kernel; | ||
| 157 | |||
| 158 | std::array<Shader*, Maxwell::MaxShaderProgram> last_shaders{}; | ||
| 159 | }; | 61 | }; |
| 160 | 62 | ||
| 161 | } // namespace OpenGL | 63 | } // namespace OpenGL |
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp deleted file mode 100644 index 9c28498e8..000000000 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ /dev/null | |||
| @@ -1,2986 +0,0 @@ | |||
| 1 | // Copyright 2018 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <array> | ||
| 6 | #include <string> | ||
| 7 | #include <string_view> | ||
| 8 | #include <utility> | ||
| 9 | #include <variant> | ||
| 10 | #include <vector> | ||
| 11 | |||
| 12 | #include <fmt/format.h> | ||
| 13 | |||
| 14 | #include "common/alignment.h" | ||
| 15 | #include "common/assert.h" | ||
| 16 | #include "common/common_types.h" | ||
| 17 | #include "common/div_ceil.h" | ||
| 18 | #include "common/logging/log.h" | ||
| 19 | #include "video_core/engines/maxwell_3d.h" | ||
| 20 | #include "video_core/engines/shader_type.h" | ||
| 21 | #include "video_core/renderer_opengl/gl_device.h" | ||
| 22 | #include "video_core/renderer_opengl/gl_rasterizer.h" | ||
| 23 | #include "video_core/renderer_opengl/gl_shader_decompiler.h" | ||
| 24 | #include "video_core/shader/ast.h" | ||
| 25 | #include "video_core/shader/node.h" | ||
| 26 | #include "video_core/shader/shader_ir.h" | ||
| 27 | #include "video_core/shader/transform_feedback.h" | ||
| 28 | |||
| 29 | namespace OpenGL { | ||
| 30 | |||
| 31 | namespace { | ||
| 32 | |||
| 33 | using Tegra::Engines::ShaderType; | ||
| 34 | using Tegra::Shader::Attribute; | ||
| 35 | using Tegra::Shader::Header; | ||
| 36 | using Tegra::Shader::IpaInterpMode; | ||
| 37 | using Tegra::Shader::IpaMode; | ||
| 38 | using Tegra::Shader::IpaSampleMode; | ||
| 39 | using Tegra::Shader::PixelImap; | ||
| 40 | using Tegra::Shader::Register; | ||
| 41 | using Tegra::Shader::TextureType; | ||
| 42 | |||
| 43 | using namespace VideoCommon::Shader; | ||
| 44 | using namespace std::string_literals; | ||
| 45 | |||
| 46 | using Maxwell = Tegra::Engines::Maxwell3D::Regs; | ||
| 47 | using Operation = const OperationNode&; | ||
| 48 | |||
| 49 | class ASTDecompiler; | ||
| 50 | class ExprDecompiler; | ||
| 51 | |||
| 52 | enum class Type { Void, Bool, Bool2, Float, Int, Uint, HalfFloat }; | ||
| 53 | |||
| 54 | constexpr std::array FLOAT_TYPES{"float", "vec2", "vec3", "vec4"}; | ||
| 55 | |||
| 56 | constexpr std::string_view INPUT_ATTRIBUTE_NAME = "in_attr"; | ||
| 57 | constexpr std::string_view OUTPUT_ATTRIBUTE_NAME = "out_attr"; | ||
| 58 | |||
| 59 | struct TextureOffset {}; | ||
| 60 | struct TextureDerivates {}; | ||
| 61 | using TextureArgument = std::pair<Type, Node>; | ||
| 62 | using TextureIR = std::variant<TextureOffset, TextureDerivates, TextureArgument>; | ||
| 63 | |||
| 64 | constexpr u32 MAX_CONSTBUFFER_SCALARS = static_cast<u32>(Maxwell::MaxConstBufferSize) / sizeof(u32); | ||
| 65 | constexpr u32 MAX_CONSTBUFFER_ELEMENTS = MAX_CONSTBUFFER_SCALARS / sizeof(u32); | ||
| 66 | |||
| 67 | constexpr std::string_view COMMON_DECLARATIONS = R"(#define ftoi floatBitsToInt | ||
| 68 | #define ftou floatBitsToUint | ||
| 69 | #define itof intBitsToFloat | ||
| 70 | #define utof uintBitsToFloat | ||
| 71 | |||
| 72 | bvec2 HalfFloatNanComparison(bvec2 comparison, vec2 pair1, vec2 pair2) {{ | ||
| 73 | bvec2 is_nan1 = isnan(pair1); | ||
| 74 | bvec2 is_nan2 = isnan(pair2); | ||
| 75 | return bvec2(comparison.x || is_nan1.x || is_nan2.x, comparison.y || is_nan1.y || is_nan2.y); | ||
| 76 | }} | ||
| 77 | |||
| 78 | const float fswzadd_modifiers_a[] = float[4](-1.0f, 1.0f, -1.0f, 0.0f ); | ||
| 79 | const float fswzadd_modifiers_b[] = float[4](-1.0f, -1.0f, 1.0f, -1.0f ); | ||
| 80 | )"; | ||
| 81 | |||
| 82 | class ShaderWriter final { | ||
| 83 | public: | ||
| 84 | void AddExpression(std::string_view text) { | ||
| 85 | DEBUG_ASSERT(scope >= 0); | ||
| 86 | if (!text.empty()) { | ||
| 87 | AppendIndentation(); | ||
| 88 | } | ||
| 89 | shader_source += text; | ||
| 90 | } | ||
| 91 | |||
| 92 | // Forwards all arguments directly to libfmt. | ||
| 93 | // Note that all formatting requirements for fmt must be | ||
| 94 | // obeyed when using this function. (e.g. {{ must be used | ||
| 95 | // printing the character '{' is desirable. Ditto for }} and '}', | ||
| 96 | // etc). | ||
| 97 | template <typename... Args> | ||
| 98 | void AddLine(std::string_view text, Args&&... args) { | ||
| 99 | AddExpression(fmt::format(fmt::runtime(text), std::forward<Args>(args)...)); | ||
| 100 | AddNewLine(); | ||
| 101 | } | ||
| 102 | |||
| 103 | void AddNewLine() { | ||
| 104 | DEBUG_ASSERT(scope >= 0); | ||
| 105 | shader_source += '\n'; | ||
| 106 | } | ||
| 107 | |||
| 108 | std::string GenerateTemporary() { | ||
| 109 | return fmt::format("tmp{}", temporary_index++); | ||
| 110 | } | ||
| 111 | |||
| 112 | std::string GetResult() { | ||
| 113 | return std::move(shader_source); | ||
| 114 | } | ||
| 115 | |||
| 116 | s32 scope = 0; | ||
| 117 | |||
| 118 | private: | ||
| 119 | void AppendIndentation() { | ||
| 120 | shader_source.append(static_cast<std::size_t>(scope) * 4, ' '); | ||
| 121 | } | ||
| 122 | |||
| 123 | std::string shader_source; | ||
| 124 | u32 temporary_index = 1; | ||
| 125 | }; | ||
| 126 | |||
| 127 | class Expression final { | ||
| 128 | public: | ||
| 129 | Expression(std::string code_, Type type_) : code{std::move(code_)}, type{type_} { | ||
| 130 | ASSERT(type != Type::Void); | ||
| 131 | } | ||
| 132 | Expression() : type{Type::Void} {} | ||
| 133 | |||
| 134 | Type GetType() const { | ||
| 135 | return type; | ||
| 136 | } | ||
| 137 | |||
| 138 | std::string GetCode() const { | ||
| 139 | return code; | ||
| 140 | } | ||
| 141 | |||
| 142 | void CheckVoid() const { | ||
| 143 | ASSERT(type == Type::Void); | ||
| 144 | } | ||
| 145 | |||
| 146 | std::string As(Type type_) const { | ||
| 147 | switch (type_) { | ||
| 148 | case Type::Bool: | ||
| 149 | return AsBool(); | ||
| 150 | case Type::Bool2: | ||
| 151 | return AsBool2(); | ||
| 152 | case Type::Float: | ||
| 153 | return AsFloat(); | ||
| 154 | case Type::Int: | ||
| 155 | return AsInt(); | ||
| 156 | case Type::Uint: | ||
| 157 | return AsUint(); | ||
| 158 | case Type::HalfFloat: | ||
| 159 | return AsHalfFloat(); | ||
| 160 | default: | ||
| 161 | UNREACHABLE_MSG("Invalid type"); | ||
| 162 | return code; | ||
| 163 | } | ||
| 164 | } | ||
| 165 | |||
| 166 | std::string AsBool() const { | ||
| 167 | switch (type) { | ||
| 168 | case Type::Bool: | ||
| 169 | return code; | ||
| 170 | default: | ||
| 171 | UNREACHABLE_MSG("Incompatible types"); | ||
| 172 | return code; | ||
| 173 | } | ||
| 174 | } | ||
| 175 | |||
| 176 | std::string AsBool2() const { | ||
| 177 | switch (type) { | ||
| 178 | case Type::Bool2: | ||
| 179 | return code; | ||
| 180 | default: | ||
| 181 | UNREACHABLE_MSG("Incompatible types"); | ||
| 182 | return code; | ||
| 183 | } | ||
| 184 | } | ||
| 185 | |||
| 186 | std::string AsFloat() const { | ||
| 187 | switch (type) { | ||
| 188 | case Type::Float: | ||
| 189 | return code; | ||
| 190 | case Type::Uint: | ||
| 191 | return fmt::format("utof({})", code); | ||
| 192 | case Type::Int: | ||
| 193 | return fmt::format("itof({})", code); | ||
| 194 | case Type::HalfFloat: | ||
| 195 | return fmt::format("utof(packHalf2x16({}))", code); | ||
| 196 | default: | ||
| 197 | UNREACHABLE_MSG("Incompatible types"); | ||
| 198 | return code; | ||
| 199 | } | ||
| 200 | } | ||
| 201 | |||
| 202 | std::string AsInt() const { | ||
| 203 | switch (type) { | ||
| 204 | case Type::Float: | ||
| 205 | return fmt::format("ftoi({})", code); | ||
| 206 | case Type::Uint: | ||
| 207 | return fmt::format("int({})", code); | ||
| 208 | case Type::Int: | ||
| 209 | return code; | ||
| 210 | case Type::HalfFloat: | ||
| 211 | return fmt::format("int(packHalf2x16({}))", code); | ||
| 212 | default: | ||
| 213 | UNREACHABLE_MSG("Incompatible types"); | ||
| 214 | return code; | ||
| 215 | } | ||
| 216 | } | ||
| 217 | |||
| 218 | std::string AsUint() const { | ||
| 219 | switch (type) { | ||
| 220 | case Type::Float: | ||
| 221 | return fmt::format("ftou({})", code); | ||
| 222 | case Type::Uint: | ||
| 223 | return code; | ||
| 224 | case Type::Int: | ||
| 225 | return fmt::format("uint({})", code); | ||
| 226 | case Type::HalfFloat: | ||
| 227 | return fmt::format("packHalf2x16({})", code); | ||
| 228 | default: | ||
| 229 | UNREACHABLE_MSG("Incompatible types"); | ||
| 230 | return code; | ||
| 231 | } | ||
| 232 | } | ||
| 233 | |||
| 234 | std::string AsHalfFloat() const { | ||
| 235 | switch (type) { | ||
| 236 | case Type::Float: | ||
| 237 | return fmt::format("unpackHalf2x16(ftou({}))", code); | ||
| 238 | case Type::Uint: | ||
| 239 | return fmt::format("unpackHalf2x16({})", code); | ||
| 240 | case Type::Int: | ||
| 241 | return fmt::format("unpackHalf2x16(int({}))", code); | ||
| 242 | case Type::HalfFloat: | ||
| 243 | return code; | ||
| 244 | default: | ||
| 245 | UNREACHABLE_MSG("Incompatible types"); | ||
| 246 | return code; | ||
| 247 | } | ||
| 248 | } | ||
| 249 | |||
| 250 | private: | ||
| 251 | std::string code; | ||
| 252 | Type type{}; | ||
| 253 | }; | ||
| 254 | |||
| 255 | const char* GetTypeString(Type type) { | ||
| 256 | switch (type) { | ||
| 257 | case Type::Bool: | ||
| 258 | return "bool"; | ||
| 259 | case Type::Bool2: | ||
| 260 | return "bvec2"; | ||
| 261 | case Type::Float: | ||
| 262 | return "float"; | ||
| 263 | case Type::Int: | ||
| 264 | return "int"; | ||
| 265 | case Type::Uint: | ||
| 266 | return "uint"; | ||
| 267 | case Type::HalfFloat: | ||
| 268 | return "vec2"; | ||
| 269 | default: | ||
| 270 | UNREACHABLE_MSG("Invalid type"); | ||
| 271 | return "<invalid type>"; | ||
| 272 | } | ||
| 273 | } | ||
| 274 | |||
| 275 | const char* GetImageTypeDeclaration(Tegra::Shader::ImageType image_type) { | ||
| 276 | switch (image_type) { | ||
| 277 | case Tegra::Shader::ImageType::Texture1D: | ||
| 278 | return "1D"; | ||
| 279 | case Tegra::Shader::ImageType::TextureBuffer: | ||
| 280 | return "Buffer"; | ||
| 281 | case Tegra::Shader::ImageType::Texture1DArray: | ||
| 282 | return "1DArray"; | ||
| 283 | case Tegra::Shader::ImageType::Texture2D: | ||
| 284 | return "2D"; | ||
| 285 | case Tegra::Shader::ImageType::Texture2DArray: | ||
| 286 | return "2DArray"; | ||
| 287 | case Tegra::Shader::ImageType::Texture3D: | ||
| 288 | return "3D"; | ||
| 289 | default: | ||
| 290 | UNREACHABLE(); | ||
| 291 | return "1D"; | ||
| 292 | } | ||
| 293 | } | ||
| 294 | |||
| 295 | /// Describes primitive behavior on geometry shaders | ||
| 296 | std::pair<const char*, u32> GetPrimitiveDescription(Maxwell::PrimitiveTopology topology) { | ||
| 297 | switch (topology) { | ||
| 298 | case Maxwell::PrimitiveTopology::Points: | ||
| 299 | return {"points", 1}; | ||
| 300 | case Maxwell::PrimitiveTopology::Lines: | ||
| 301 | case Maxwell::PrimitiveTopology::LineStrip: | ||
| 302 | return {"lines", 2}; | ||
| 303 | case Maxwell::PrimitiveTopology::LinesAdjacency: | ||
| 304 | case Maxwell::PrimitiveTopology::LineStripAdjacency: | ||
| 305 | return {"lines_adjacency", 4}; | ||
| 306 | case Maxwell::PrimitiveTopology::Triangles: | ||
| 307 | case Maxwell::PrimitiveTopology::TriangleStrip: | ||
| 308 | case Maxwell::PrimitiveTopology::TriangleFan: | ||
| 309 | return {"triangles", 3}; | ||
| 310 | case Maxwell::PrimitiveTopology::TrianglesAdjacency: | ||
| 311 | case Maxwell::PrimitiveTopology::TriangleStripAdjacency: | ||
| 312 | return {"triangles_adjacency", 6}; | ||
| 313 | default: | ||
| 314 | UNIMPLEMENTED_MSG("topology={}", topology); | ||
| 315 | return {"points", 1}; | ||
| 316 | } | ||
| 317 | } | ||
| 318 | |||
| 319 | /// Generates code to use for a swizzle operation. | ||
| 320 | constexpr const char* GetSwizzle(std::size_t element) { | ||
| 321 | constexpr std::array swizzle = {".x", ".y", ".z", ".w"}; | ||
| 322 | return swizzle.at(element); | ||
| 323 | } | ||
| 324 | |||
| 325 | constexpr const char* GetColorSwizzle(std::size_t element) { | ||
| 326 | constexpr std::array swizzle = {".r", ".g", ".b", ".a"}; | ||
| 327 | return swizzle.at(element); | ||
| 328 | } | ||
| 329 | |||
| 330 | /// Translate topology | ||
| 331 | std::string GetTopologyName(Tegra::Shader::OutputTopology topology) { | ||
| 332 | switch (topology) { | ||
| 333 | case Tegra::Shader::OutputTopology::PointList: | ||
| 334 | return "points"; | ||
| 335 | case Tegra::Shader::OutputTopology::LineStrip: | ||
| 336 | return "line_strip"; | ||
| 337 | case Tegra::Shader::OutputTopology::TriangleStrip: | ||
| 338 | return "triangle_strip"; | ||
| 339 | default: | ||
| 340 | UNIMPLEMENTED_MSG("Unknown output topology: {}", topology); | ||
| 341 | return "points"; | ||
| 342 | } | ||
| 343 | } | ||
| 344 | |||
| 345 | /// Returns true if an object has to be treated as precise | ||
| 346 | bool IsPrecise(Operation operand) { | ||
| 347 | const auto& meta{operand.GetMeta()}; | ||
| 348 | if (const auto arithmetic = std::get_if<MetaArithmetic>(&meta)) { | ||
| 349 | return arithmetic->precise; | ||
| 350 | } | ||
| 351 | return false; | ||
| 352 | } | ||
| 353 | |||
| 354 | bool IsPrecise(const Node& node) { | ||
| 355 | if (const auto operation = std::get_if<OperationNode>(&*node)) { | ||
| 356 | return IsPrecise(*operation); | ||
| 357 | } | ||
| 358 | return false; | ||
| 359 | } | ||
| 360 | |||
| 361 | constexpr bool IsGenericAttribute(Attribute::Index index) { | ||
| 362 | return index >= Attribute::Index::Attribute_0 && index <= Attribute::Index::Attribute_31; | ||
| 363 | } | ||
| 364 | |||
| 365 | constexpr bool IsLegacyTexCoord(Attribute::Index index) { | ||
| 366 | return static_cast<int>(index) >= static_cast<int>(Attribute::Index::TexCoord_0) && | ||
| 367 | static_cast<int>(index) <= static_cast<int>(Attribute::Index::TexCoord_7); | ||
| 368 | } | ||
| 369 | |||
| 370 | constexpr Attribute::Index ToGenericAttribute(u64 value) { | ||
| 371 | return static_cast<Attribute::Index>(value + static_cast<u64>(Attribute::Index::Attribute_0)); | ||
| 372 | } | ||
| 373 | |||
| 374 | constexpr int GetLegacyTexCoordIndex(Attribute::Index index) { | ||
| 375 | return static_cast<int>(index) - static_cast<int>(Attribute::Index::TexCoord_0); | ||
| 376 | } | ||
| 377 | |||
| 378 | u32 GetGenericAttributeIndex(Attribute::Index index) { | ||
| 379 | ASSERT(IsGenericAttribute(index)); | ||
| 380 | return static_cast<u32>(index) - static_cast<u32>(Attribute::Index::Attribute_0); | ||
| 381 | } | ||
| 382 | |||
| 383 | constexpr const char* GetFlowStackPrefix(MetaStackClass stack) { | ||
| 384 | switch (stack) { | ||
| 385 | case MetaStackClass::Ssy: | ||
| 386 | return "ssy"; | ||
| 387 | case MetaStackClass::Pbk: | ||
| 388 | return "pbk"; | ||
| 389 | } | ||
| 390 | return {}; | ||
| 391 | } | ||
| 392 | |||
| 393 | std::string FlowStackName(MetaStackClass stack) { | ||
| 394 | return fmt::format("{}_flow_stack", GetFlowStackPrefix(stack)); | ||
| 395 | } | ||
| 396 | |||
| 397 | std::string FlowStackTopName(MetaStackClass stack) { | ||
| 398 | return fmt::format("{}_flow_stack_top", GetFlowStackPrefix(stack)); | ||
| 399 | } | ||
| 400 | |||
| 401 | struct GenericVaryingDescription { | ||
| 402 | std::string name; | ||
| 403 | u8 first_element = 0; | ||
| 404 | bool is_scalar = false; | ||
| 405 | }; | ||
| 406 | |||
| 407 | class GLSLDecompiler final { | ||
| 408 | public: | ||
| 409 | explicit GLSLDecompiler(const Device& device_, const ShaderIR& ir_, const Registry& registry_, | ||
| 410 | ShaderType stage_, std::string_view identifier_, | ||
| 411 | std::string_view suffix_) | ||
| 412 | : device{device_}, ir{ir_}, registry{registry_}, stage{stage_}, | ||
| 413 | identifier{identifier_}, suffix{suffix_}, header{ir.GetHeader()} { | ||
| 414 | if (stage != ShaderType::Compute) { | ||
| 415 | transform_feedback = BuildTransformFeedback(registry.GetGraphicsInfo()); | ||
| 416 | } | ||
| 417 | } | ||
| 418 | |||
| 419 | void Decompile() { | ||
| 420 | DeclareHeader(); | ||
| 421 | DeclareVertex(); | ||
| 422 | DeclareGeometry(); | ||
| 423 | DeclareFragment(); | ||
| 424 | DeclareCompute(); | ||
| 425 | DeclareInputAttributes(); | ||
| 426 | DeclareOutputAttributes(); | ||
| 427 | DeclareImages(); | ||
| 428 | DeclareSamplers(); | ||
| 429 | DeclareGlobalMemory(); | ||
| 430 | DeclareConstantBuffers(); | ||
| 431 | DeclareLocalMemory(); | ||
| 432 | DeclareRegisters(); | ||
| 433 | DeclarePredicates(); | ||
| 434 | DeclareInternalFlags(); | ||
| 435 | DeclareCustomVariables(); | ||
| 436 | DeclarePhysicalAttributeReader(); | ||
| 437 | |||
| 438 | code.AddLine("void main() {{"); | ||
| 439 | ++code.scope; | ||
| 440 | |||
| 441 | if (stage == ShaderType::Vertex) { | ||
| 442 | code.AddLine("gl_Position = vec4(0.0f, 0.0f, 0.0f, 1.0f);"); | ||
| 443 | } | ||
| 444 | |||
| 445 | if (ir.IsDecompiled()) { | ||
| 446 | DecompileAST(); | ||
| 447 | } else { | ||
| 448 | DecompileBranchMode(); | ||
| 449 | } | ||
| 450 | |||
| 451 | --code.scope; | ||
| 452 | code.AddLine("}}"); | ||
| 453 | } | ||
| 454 | |||
| 455 | std::string GetResult() { | ||
| 456 | return code.GetResult(); | ||
| 457 | } | ||
| 458 | |||
| 459 | private: | ||
| 460 | friend class ASTDecompiler; | ||
| 461 | friend class ExprDecompiler; | ||
| 462 | |||
| 463 | void DecompileBranchMode() { | ||
| 464 | // VM's program counter | ||
| 465 | const auto first_address = ir.GetBasicBlocks().begin()->first; | ||
| 466 | code.AddLine("uint jmp_to = {}U;", first_address); | ||
| 467 | |||
| 468 | // TODO(Subv): Figure out the actual depth of the flow stack, for now it seems | ||
| 469 | // unlikely that shaders will use 20 nested SSYs and PBKs. | ||
| 470 | constexpr u32 FLOW_STACK_SIZE = 20; | ||
| 471 | if (!ir.IsFlowStackDisabled()) { | ||
| 472 | for (const auto stack : std::array{MetaStackClass::Ssy, MetaStackClass::Pbk}) { | ||
| 473 | code.AddLine("uint {}[{}];", FlowStackName(stack), FLOW_STACK_SIZE); | ||
| 474 | code.AddLine("uint {} = 0U;", FlowStackTopName(stack)); | ||
| 475 | } | ||
| 476 | } | ||
| 477 | |||
| 478 | code.AddLine("while (true) {{"); | ||
| 479 | ++code.scope; | ||
| 480 | |||
| 481 | code.AddLine("switch (jmp_to) {{"); | ||
| 482 | |||
| 483 | for (const auto& pair : ir.GetBasicBlocks()) { | ||
| 484 | const auto& [address, bb] = pair; | ||
| 485 | code.AddLine("case 0x{:X}U: {{", address); | ||
| 486 | ++code.scope; | ||
| 487 | |||
| 488 | VisitBlock(bb); | ||
| 489 | |||
| 490 | --code.scope; | ||
| 491 | code.AddLine("}}"); | ||
| 492 | } | ||
| 493 | |||
| 494 | code.AddLine("default: return;"); | ||
| 495 | code.AddLine("}}"); | ||
| 496 | |||
| 497 | --code.scope; | ||
| 498 | code.AddLine("}}"); | ||
| 499 | } | ||
| 500 | |||
| 501 | void DecompileAST(); | ||
| 502 | |||
| 503 | void DeclareHeader() { | ||
| 504 | if (!identifier.empty()) { | ||
| 505 | code.AddLine("// {}", identifier); | ||
| 506 | } | ||
| 507 | const bool use_compatibility = ir.UsesLegacyVaryings() || ir.UsesYNegate(); | ||
| 508 | code.AddLine("#version 440 {}", use_compatibility ? "compatibility" : "core"); | ||
| 509 | code.AddLine("#extension GL_ARB_separate_shader_objects : enable"); | ||
| 510 | if (device.HasShaderBallot()) { | ||
| 511 | code.AddLine("#extension GL_ARB_shader_ballot : require"); | ||
| 512 | } | ||
| 513 | if (device.HasVertexViewportLayer()) { | ||
| 514 | code.AddLine("#extension GL_ARB_shader_viewport_layer_array : require"); | ||
| 515 | } | ||
| 516 | if (device.HasImageLoadFormatted()) { | ||
| 517 | code.AddLine("#extension GL_EXT_shader_image_load_formatted : require"); | ||
| 518 | } | ||
| 519 | if (device.HasTextureShadowLod()) { | ||
| 520 | code.AddLine("#extension GL_EXT_texture_shadow_lod : require"); | ||
| 521 | } | ||
| 522 | if (device.HasWarpIntrinsics()) { | ||
| 523 | code.AddLine("#extension GL_NV_gpu_shader5 : require"); | ||
| 524 | code.AddLine("#extension GL_NV_shader_thread_group : require"); | ||
| 525 | code.AddLine("#extension GL_NV_shader_thread_shuffle : require"); | ||
| 526 | } | ||
| 527 | // This pragma stops Nvidia's driver from over optimizing math (probably using fp16 | ||
| 528 | // operations) on places where we don't want to. | ||
| 529 | // Thanks to Ryujinx for finding this workaround. | ||
| 530 | code.AddLine("#pragma optionNV(fastmath off)"); | ||
| 531 | |||
| 532 | code.AddNewLine(); | ||
| 533 | |||
| 534 | code.AddLine(COMMON_DECLARATIONS); | ||
| 535 | } | ||
| 536 | |||
| 537 | void DeclareVertex() { | ||
| 538 | if (stage != ShaderType::Vertex) { | ||
| 539 | return; | ||
| 540 | } | ||
| 541 | |||
| 542 | DeclareVertexRedeclarations(); | ||
| 543 | } | ||
| 544 | |||
| 545 | void DeclareGeometry() { | ||
| 546 | if (stage != ShaderType::Geometry) { | ||
| 547 | return; | ||
| 548 | } | ||
| 549 | |||
| 550 | const auto& info = registry.GetGraphicsInfo(); | ||
| 551 | const auto input_topology = info.primitive_topology; | ||
| 552 | const auto [glsl_topology, max_vertices] = GetPrimitiveDescription(input_topology); | ||
| 553 | max_input_vertices = max_vertices; | ||
| 554 | code.AddLine("layout ({}) in;", glsl_topology); | ||
| 555 | |||
| 556 | const auto topology = GetTopologyName(header.common3.output_topology); | ||
| 557 | const auto max_output_vertices = header.common4.max_output_vertices.Value(); | ||
| 558 | code.AddLine("layout ({}, max_vertices = {}) out;", topology, max_output_vertices); | ||
| 559 | code.AddNewLine(); | ||
| 560 | |||
| 561 | code.AddLine("in gl_PerVertex {{"); | ||
| 562 | ++code.scope; | ||
| 563 | code.AddLine("vec4 gl_Position;"); | ||
| 564 | --code.scope; | ||
| 565 | code.AddLine("}} gl_in[];"); | ||
| 566 | |||
| 567 | DeclareVertexRedeclarations(); | ||
| 568 | } | ||
| 569 | |||
| 570 | void DeclareFragment() { | ||
| 571 | if (stage != ShaderType::Fragment) { | ||
| 572 | return; | ||
| 573 | } | ||
| 574 | if (ir.UsesLegacyVaryings()) { | ||
| 575 | code.AddLine("in gl_PerFragment {{"); | ||
| 576 | ++code.scope; | ||
| 577 | code.AddLine("vec4 gl_TexCoord[8];"); | ||
| 578 | code.AddLine("vec4 gl_Color;"); | ||
| 579 | code.AddLine("vec4 gl_SecondaryColor;"); | ||
| 580 | --code.scope; | ||
| 581 | code.AddLine("}};"); | ||
| 582 | } | ||
| 583 | |||
| 584 | for (u32 rt = 0; rt < Maxwell::NumRenderTargets; ++rt) { | ||
| 585 | code.AddLine("layout (location = {}) out vec4 frag_color{};", rt, rt); | ||
| 586 | } | ||
| 587 | } | ||
| 588 | |||
| 589 | void DeclareCompute() { | ||
| 590 | if (stage != ShaderType::Compute) { | ||
| 591 | return; | ||
| 592 | } | ||
| 593 | const auto& info = registry.GetComputeInfo(); | ||
| 594 | if (u32 size = info.shared_memory_size_in_words * 4; size > 0) { | ||
| 595 | const u32 limit = device.GetMaxComputeSharedMemorySize(); | ||
| 596 | if (size > limit) { | ||
| 597 | LOG_ERROR(Render_OpenGL, "Shared memory size {} is clamped to host's limit {}", | ||
| 598 | size, limit); | ||
| 599 | size = limit; | ||
| 600 | } | ||
| 601 | |||
| 602 | code.AddLine("shared uint smem[{}];", size / 4); | ||
| 603 | code.AddNewLine(); | ||
| 604 | } | ||
| 605 | code.AddLine("layout (local_size_x = {}, local_size_y = {}, local_size_z = {}) in;", | ||
| 606 | info.workgroup_size[0], info.workgroup_size[1], info.workgroup_size[2]); | ||
| 607 | code.AddNewLine(); | ||
| 608 | } | ||
| 609 | |||
| 610 | void DeclareVertexRedeclarations() { | ||
| 611 | code.AddLine("out gl_PerVertex {{"); | ||
| 612 | ++code.scope; | ||
| 613 | |||
| 614 | auto pos_xfb = GetTransformFeedbackDecoration(Attribute::Index::Position); | ||
| 615 | if (!pos_xfb.empty()) { | ||
| 616 | pos_xfb = fmt::format("layout ({}) ", pos_xfb); | ||
| 617 | } | ||
| 618 | const char* pos_type = | ||
| 619 | FLOAT_TYPES.at(GetNumComponents(Attribute::Index::Position).value_or(4) - 1); | ||
| 620 | code.AddLine("{}{} gl_Position;", pos_xfb, pos_type); | ||
| 621 | |||
| 622 | for (const auto attribute : ir.GetOutputAttributes()) { | ||
| 623 | if (attribute == Attribute::Index::ClipDistances0123 || | ||
| 624 | attribute == Attribute::Index::ClipDistances4567) { | ||
| 625 | code.AddLine("float gl_ClipDistance[];"); | ||
| 626 | break; | ||
| 627 | } | ||
| 628 | } | ||
| 629 | |||
| 630 | if (stage != ShaderType::Geometry && | ||
| 631 | (stage != ShaderType::Vertex || device.HasVertexViewportLayer())) { | ||
| 632 | if (ir.UsesLayer()) { | ||
| 633 | code.AddLine("int gl_Layer;"); | ||
| 634 | } | ||
| 635 | if (ir.UsesViewportIndex()) { | ||
| 636 | code.AddLine("int gl_ViewportIndex;"); | ||
| 637 | } | ||
| 638 | } else if ((ir.UsesLayer() || ir.UsesViewportIndex()) && stage == ShaderType::Vertex && | ||
| 639 | !device.HasVertexViewportLayer()) { | ||
| 640 | LOG_ERROR( | ||
| 641 | Render_OpenGL, | ||
| 642 | "GL_ARB_shader_viewport_layer_array is not available and its required by a shader"); | ||
| 643 | } | ||
| 644 | |||
| 645 | if (ir.UsesPointSize()) { | ||
| 646 | code.AddLine("float gl_PointSize;"); | ||
| 647 | } | ||
| 648 | |||
| 649 | if (ir.UsesLegacyVaryings()) { | ||
| 650 | code.AddLine("vec4 gl_TexCoord[8];"); | ||
| 651 | code.AddLine("vec4 gl_FrontColor;"); | ||
| 652 | code.AddLine("vec4 gl_FrontSecondaryColor;"); | ||
| 653 | code.AddLine("vec4 gl_BackColor;"); | ||
| 654 | code.AddLine("vec4 gl_BackSecondaryColor;"); | ||
| 655 | } | ||
| 656 | |||
| 657 | --code.scope; | ||
| 658 | code.AddLine("}};"); | ||
| 659 | code.AddNewLine(); | ||
| 660 | |||
| 661 | if (stage == ShaderType::Geometry) { | ||
| 662 | if (ir.UsesLayer()) { | ||
| 663 | code.AddLine("out int gl_Layer;"); | ||
| 664 | } | ||
| 665 | if (ir.UsesViewportIndex()) { | ||
| 666 | code.AddLine("out int gl_ViewportIndex;"); | ||
| 667 | } | ||
| 668 | } | ||
| 669 | code.AddNewLine(); | ||
| 670 | } | ||
| 671 | |||
| 672 | void DeclareRegisters() { | ||
| 673 | const auto& registers = ir.GetRegisters(); | ||
| 674 | for (const u32 gpr : registers) { | ||
| 675 | code.AddLine("float {} = 0.0f;", GetRegister(gpr)); | ||
| 676 | } | ||
| 677 | if (!registers.empty()) { | ||
| 678 | code.AddNewLine(); | ||
| 679 | } | ||
| 680 | } | ||
| 681 | |||
| 682 | void DeclareCustomVariables() { | ||
| 683 | const u32 num_custom_variables = ir.GetNumCustomVariables(); | ||
| 684 | for (u32 i = 0; i < num_custom_variables; ++i) { | ||
| 685 | code.AddLine("float {} = 0.0f;", GetCustomVariable(i)); | ||
| 686 | } | ||
| 687 | if (num_custom_variables > 0) { | ||
| 688 | code.AddNewLine(); | ||
| 689 | } | ||
| 690 | } | ||
| 691 | |||
| 692 | void DeclarePredicates() { | ||
| 693 | const auto& predicates = ir.GetPredicates(); | ||
| 694 | for (const auto pred : predicates) { | ||
| 695 | code.AddLine("bool {} = false;", GetPredicate(pred)); | ||
| 696 | } | ||
| 697 | if (!predicates.empty()) { | ||
| 698 | code.AddNewLine(); | ||
| 699 | } | ||
| 700 | } | ||
| 701 | |||
| 702 | void DeclareLocalMemory() { | ||
| 703 | u64 local_memory_size = 0; | ||
| 704 | if (stage == ShaderType::Compute) { | ||
| 705 | local_memory_size = registry.GetComputeInfo().local_memory_size_in_words * 4ULL; | ||
| 706 | } else { | ||
| 707 | local_memory_size = header.GetLocalMemorySize(); | ||
| 708 | } | ||
| 709 | if (local_memory_size == 0) { | ||
| 710 | return; | ||
| 711 | } | ||
| 712 | const u64 element_count = Common::AlignUp(local_memory_size, 4) / 4; | ||
| 713 | code.AddLine("uint {}[{}];", GetLocalMemory(), element_count); | ||
| 714 | code.AddNewLine(); | ||
| 715 | } | ||
| 716 | |||
| 717 | void DeclareInternalFlags() { | ||
| 718 | for (u32 flag = 0; flag < static_cast<u32>(InternalFlag::Amount); flag++) { | ||
| 719 | const auto flag_code = static_cast<InternalFlag>(flag); | ||
| 720 | code.AddLine("bool {} = false;", GetInternalFlag(flag_code)); | ||
| 721 | } | ||
| 722 | code.AddNewLine(); | ||
| 723 | } | ||
| 724 | |||
| 725 | const char* GetInputFlags(PixelImap attribute) { | ||
| 726 | switch (attribute) { | ||
| 727 | case PixelImap::Perspective: | ||
| 728 | return "smooth"; | ||
| 729 | case PixelImap::Constant: | ||
| 730 | return "flat"; | ||
| 731 | case PixelImap::ScreenLinear: | ||
| 732 | return "noperspective"; | ||
| 733 | case PixelImap::Unused: | ||
| 734 | break; | ||
| 735 | } | ||
| 736 | UNIMPLEMENTED_MSG("Unknown attribute usage index={}", attribute); | ||
| 737 | return {}; | ||
| 738 | } | ||
| 739 | |||
| 740 | void DeclareInputAttributes() { | ||
| 741 | if (ir.HasPhysicalAttributes()) { | ||
| 742 | const u32 num_inputs{GetNumPhysicalInputAttributes()}; | ||
| 743 | for (u32 i = 0; i < num_inputs; ++i) { | ||
| 744 | DeclareInputAttribute(ToGenericAttribute(i), true); | ||
| 745 | } | ||
| 746 | code.AddNewLine(); | ||
| 747 | return; | ||
| 748 | } | ||
| 749 | |||
| 750 | const auto& attributes = ir.GetInputAttributes(); | ||
| 751 | for (const auto index : attributes) { | ||
| 752 | if (IsGenericAttribute(index)) { | ||
| 753 | DeclareInputAttribute(index, false); | ||
| 754 | } | ||
| 755 | } | ||
| 756 | if (!attributes.empty()) { | ||
| 757 | code.AddNewLine(); | ||
| 758 | } | ||
| 759 | } | ||
| 760 | |||
| 761 | void DeclareInputAttribute(Attribute::Index index, bool skip_unused) { | ||
| 762 | const u32 location{GetGenericAttributeIndex(index)}; | ||
| 763 | |||
| 764 | std::string name{GetGenericInputAttribute(index)}; | ||
| 765 | if (stage == ShaderType::Geometry) { | ||
| 766 | name = "gs_" + name + "[]"; | ||
| 767 | } | ||
| 768 | |||
| 769 | std::string suffix_; | ||
| 770 | if (stage == ShaderType::Fragment) { | ||
| 771 | const auto input_mode{header.ps.GetPixelImap(location)}; | ||
| 772 | if (input_mode == PixelImap::Unused) { | ||
| 773 | return; | ||
| 774 | } | ||
| 775 | suffix_ = GetInputFlags(input_mode); | ||
| 776 | } | ||
| 777 | |||
| 778 | code.AddLine("layout (location = {}) {} in vec4 {};", location, suffix_, name); | ||
| 779 | } | ||
| 780 | |||
| 781 | void DeclareOutputAttributes() { | ||
| 782 | if (ir.HasPhysicalAttributes() && stage != ShaderType::Fragment) { | ||
| 783 | for (u32 i = 0; i < GetNumPhysicalVaryings(); ++i) { | ||
| 784 | DeclareOutputAttribute(ToGenericAttribute(i)); | ||
| 785 | } | ||
| 786 | code.AddNewLine(); | ||
| 787 | return; | ||
| 788 | } | ||
| 789 | |||
| 790 | const auto& attributes = ir.GetOutputAttributes(); | ||
| 791 | for (const auto index : attributes) { | ||
| 792 | if (IsGenericAttribute(index)) { | ||
| 793 | DeclareOutputAttribute(index); | ||
| 794 | } | ||
| 795 | } | ||
| 796 | if (!attributes.empty()) { | ||
| 797 | code.AddNewLine(); | ||
| 798 | } | ||
| 799 | } | ||
| 800 | |||
| 801 | std::optional<std::size_t> GetNumComponents(Attribute::Index index, u8 element = 0) const { | ||
| 802 | const u8 location = static_cast<u8>(static_cast<u32>(index) * 4 + element); | ||
| 803 | const auto it = transform_feedback.find(location); | ||
| 804 | if (it == transform_feedback.end()) { | ||
| 805 | return std::nullopt; | ||
| 806 | } | ||
| 807 | return it->second.components; | ||
| 808 | } | ||
| 809 | |||
| 810 | std::string GetTransformFeedbackDecoration(Attribute::Index index, u8 element = 0) const { | ||
| 811 | const u8 location = static_cast<u8>(static_cast<u32>(index) * 4 + element); | ||
| 812 | const auto it = transform_feedback.find(location); | ||
| 813 | if (it == transform_feedback.end()) { | ||
| 814 | return {}; | ||
| 815 | } | ||
| 816 | |||
| 817 | const VaryingTFB& tfb = it->second; | ||
| 818 | return fmt::format("xfb_buffer = {}, xfb_offset = {}, xfb_stride = {}", tfb.buffer, | ||
| 819 | tfb.offset, tfb.stride); | ||
| 820 | } | ||
| 821 | |||
| 822 | void DeclareOutputAttribute(Attribute::Index index) { | ||
| 823 | static constexpr std::string_view swizzle = "xyzw"; | ||
| 824 | u8 element = 0; | ||
| 825 | while (element < 4) { | ||
| 826 | auto xfb = GetTransformFeedbackDecoration(index, element); | ||
| 827 | if (!xfb.empty()) { | ||
| 828 | xfb = fmt::format(", {}", xfb); | ||
| 829 | } | ||
| 830 | const std::size_t remainder = 4 - element; | ||
| 831 | const std::size_t num_components = GetNumComponents(index, element).value_or(remainder); | ||
| 832 | const char* const type = FLOAT_TYPES.at(num_components - 1); | ||
| 833 | |||
| 834 | const u32 location = GetGenericAttributeIndex(index); | ||
| 835 | |||
| 836 | GenericVaryingDescription description; | ||
| 837 | description.first_element = static_cast<u8>(element); | ||
| 838 | description.is_scalar = num_components == 1; | ||
| 839 | description.name = AppendSuffix(location, OUTPUT_ATTRIBUTE_NAME); | ||
| 840 | if (element != 0 || num_components != 4) { | ||
| 841 | const std::string_view name_swizzle = swizzle.substr(element, num_components); | ||
| 842 | description.name = fmt::format("{}_{}", description.name, name_swizzle); | ||
| 843 | } | ||
| 844 | for (std::size_t i = 0; i < num_components; ++i) { | ||
| 845 | const u8 offset = static_cast<u8>(location * 4 + element + i); | ||
| 846 | varying_description.insert({offset, description}); | ||
| 847 | } | ||
| 848 | |||
| 849 | code.AddLine("layout (location = {}, component = {}{}) out {} {};", location, element, | ||
| 850 | xfb, type, description.name); | ||
| 851 | |||
| 852 | element = static_cast<u8>(static_cast<std::size_t>(element) + num_components); | ||
| 853 | } | ||
| 854 | } | ||
| 855 | |||
| 856 | void DeclareConstantBuffers() { | ||
| 857 | u32 binding = device.GetBaseBindings(stage).uniform_buffer; | ||
| 858 | for (const auto& [index, info] : ir.GetConstantBuffers()) { | ||
| 859 | const u32 num_elements = Common::DivCeil(info.GetSize(), 4 * sizeof(u32)); | ||
| 860 | const u32 size = info.IsIndirect() ? MAX_CONSTBUFFER_ELEMENTS : num_elements; | ||
| 861 | code.AddLine("layout (std140, binding = {}) uniform {} {{", binding++, | ||
| 862 | GetConstBufferBlock(index)); | ||
| 863 | code.AddLine(" uvec4 {}[{}];", GetConstBuffer(index), size); | ||
| 864 | code.AddLine("}};"); | ||
| 865 | code.AddNewLine(); | ||
| 866 | } | ||
| 867 | } | ||
| 868 | |||
| 869 | void DeclareGlobalMemory() { | ||
| 870 | u32 binding = device.GetBaseBindings(stage).shader_storage_buffer; | ||
| 871 | for (const auto& [base, usage] : ir.GetGlobalMemory()) { | ||
| 872 | // Since we don't know how the shader will use the shader, hint the driver to disable as | ||
| 873 | // much optimizations as possible | ||
| 874 | std::string qualifier = "coherent volatile"; | ||
| 875 | if (usage.is_read && !usage.is_written) { | ||
| 876 | qualifier += " readonly"; | ||
| 877 | } else if (usage.is_written && !usage.is_read) { | ||
| 878 | qualifier += " writeonly"; | ||
| 879 | } | ||
| 880 | |||
| 881 | code.AddLine("layout (std430, binding = {}) {} buffer {} {{", binding++, qualifier, | ||
| 882 | GetGlobalMemoryBlock(base)); | ||
| 883 | code.AddLine(" uint {}[];", GetGlobalMemory(base)); | ||
| 884 | code.AddLine("}};"); | ||
| 885 | code.AddNewLine(); | ||
| 886 | } | ||
| 887 | } | ||
| 888 | |||
| 889 | void DeclareSamplers() { | ||
| 890 | u32 binding = device.GetBaseBindings(stage).sampler; | ||
| 891 | for (const auto& sampler : ir.GetSamplers()) { | ||
| 892 | const std::string name = GetSampler(sampler); | ||
| 893 | const std::string description = fmt::format("layout (binding = {}) uniform", binding); | ||
| 894 | binding += sampler.is_indexed ? sampler.size : 1; | ||
| 895 | |||
| 896 | std::string sampler_type = [&]() { | ||
| 897 | if (sampler.is_buffer) { | ||
| 898 | return "samplerBuffer"; | ||
| 899 | } | ||
| 900 | switch (sampler.type) { | ||
| 901 | case TextureType::Texture1D: | ||
| 902 | return "sampler1D"; | ||
| 903 | case TextureType::Texture2D: | ||
| 904 | return "sampler2D"; | ||
| 905 | case TextureType::Texture3D: | ||
| 906 | return "sampler3D"; | ||
| 907 | case TextureType::TextureCube: | ||
| 908 | return "samplerCube"; | ||
| 909 | default: | ||
| 910 | UNREACHABLE(); | ||
| 911 | return "sampler2D"; | ||
| 912 | } | ||
| 913 | }(); | ||
| 914 | if (sampler.is_array) { | ||
| 915 | sampler_type += "Array"; | ||
| 916 | } | ||
| 917 | if (sampler.is_shadow) { | ||
| 918 | sampler_type += "Shadow"; | ||
| 919 | } | ||
| 920 | |||
| 921 | if (!sampler.is_indexed) { | ||
| 922 | code.AddLine("{} {} {};", description, sampler_type, name); | ||
| 923 | } else { | ||
| 924 | code.AddLine("{} {} {}[{}];", description, sampler_type, name, sampler.size); | ||
| 925 | } | ||
| 926 | } | ||
| 927 | if (!ir.GetSamplers().empty()) { | ||
| 928 | code.AddNewLine(); | ||
| 929 | } | ||
| 930 | } | ||
| 931 | |||
| 932 | void DeclarePhysicalAttributeReader() { | ||
| 933 | if (!ir.HasPhysicalAttributes()) { | ||
| 934 | return; | ||
| 935 | } | ||
| 936 | code.AddLine("float ReadPhysicalAttribute(uint physical_address) {{"); | ||
| 937 | ++code.scope; | ||
| 938 | code.AddLine("switch (physical_address) {{"); | ||
| 939 | |||
| 940 | // Just declare generic attributes for now. | ||
| 941 | const auto num_attributes{static_cast<u32>(GetNumPhysicalInputAttributes())}; | ||
| 942 | for (u32 index = 0; index < num_attributes; ++index) { | ||
| 943 | const auto attribute{ToGenericAttribute(index)}; | ||
| 944 | for (u32 element = 0; element < 4; ++element) { | ||
| 945 | constexpr u32 generic_base = 0x80; | ||
| 946 | constexpr u32 generic_stride = 16; | ||
| 947 | constexpr u32 element_stride = 4; | ||
| 948 | const u32 address{generic_base + index * generic_stride + element * element_stride}; | ||
| 949 | |||
| 950 | const bool declared = stage != ShaderType::Fragment || | ||
| 951 | header.ps.GetPixelImap(index) != PixelImap::Unused; | ||
| 952 | const std::string value = | ||
| 953 | declared ? ReadAttribute(attribute, element).AsFloat() : "0.0f"; | ||
| 954 | code.AddLine("case 0x{:X}U: return {};", address, value); | ||
| 955 | } | ||
| 956 | } | ||
| 957 | |||
| 958 | code.AddLine("default: return 0;"); | ||
| 959 | |||
| 960 | code.AddLine("}}"); | ||
| 961 | --code.scope; | ||
| 962 | code.AddLine("}}"); | ||
| 963 | code.AddNewLine(); | ||
| 964 | } | ||
| 965 | |||
| 966 | void DeclareImages() { | ||
| 967 | u32 binding = device.GetBaseBindings(stage).image; | ||
| 968 | for (const auto& image : ir.GetImages()) { | ||
| 969 | std::string qualifier = "coherent volatile"; | ||
| 970 | if (image.is_read && !image.is_written) { | ||
| 971 | qualifier += " readonly"; | ||
| 972 | } else if (image.is_written && !image.is_read) { | ||
| 973 | qualifier += " writeonly"; | ||
| 974 | } | ||
| 975 | |||
| 976 | const char* format = image.is_atomic ? "r32ui, " : ""; | ||
| 977 | const char* type_declaration = GetImageTypeDeclaration(image.type); | ||
| 978 | code.AddLine("layout ({}binding = {}) {} uniform uimage{} {};", format, binding++, | ||
| 979 | qualifier, type_declaration, GetImage(image)); | ||
| 980 | } | ||
| 981 | if (!ir.GetImages().empty()) { | ||
| 982 | code.AddNewLine(); | ||
| 983 | } | ||
| 984 | } | ||
| 985 | |||
| 986 | void VisitBlock(const NodeBlock& bb) { | ||
| 987 | for (const auto& node : bb) { | ||
| 988 | Visit(node).CheckVoid(); | ||
| 989 | } | ||
| 990 | } | ||
| 991 | |||
| 992 | Expression Visit(const Node& node) { | ||
| 993 | if (const auto operation = std::get_if<OperationNode>(&*node)) { | ||
| 994 | if (const auto amend_index = operation->GetAmendIndex()) { | ||
| 995 | Visit(ir.GetAmendNode(*amend_index)).CheckVoid(); | ||
| 996 | } | ||
| 997 | const auto operation_index = static_cast<std::size_t>(operation->GetCode()); | ||
| 998 | if (operation_index >= operation_decompilers.size()) { | ||
| 999 | UNREACHABLE_MSG("Out of bounds operation: {}", operation_index); | ||
| 1000 | return {}; | ||
| 1001 | } | ||
| 1002 | const auto decompiler = operation_decompilers[operation_index]; | ||
| 1003 | if (decompiler == nullptr) { | ||
| 1004 | UNREACHABLE_MSG("Undefined operation: {}", operation_index); | ||
| 1005 | return {}; | ||
| 1006 | } | ||
| 1007 | return (this->*decompiler)(*operation); | ||
| 1008 | } | ||
| 1009 | |||
| 1010 | if (const auto gpr = std::get_if<GprNode>(&*node)) { | ||
| 1011 | const u32 index = gpr->GetIndex(); | ||
| 1012 | if (index == Register::ZeroIndex) { | ||
| 1013 | return {"0U", Type::Uint}; | ||
| 1014 | } | ||
| 1015 | return {GetRegister(index), Type::Float}; | ||
| 1016 | } | ||
| 1017 | |||
| 1018 | if (const auto cv = std::get_if<CustomVarNode>(&*node)) { | ||
| 1019 | const u32 index = cv->GetIndex(); | ||
| 1020 | return {GetCustomVariable(index), Type::Float}; | ||
| 1021 | } | ||
| 1022 | |||
| 1023 | if (const auto immediate = std::get_if<ImmediateNode>(&*node)) { | ||
| 1024 | const u32 value = immediate->GetValue(); | ||
| 1025 | if (value < 10) { | ||
| 1026 | // For eyecandy avoid using hex numbers on single digits | ||
| 1027 | return {fmt::format("{}U", immediate->GetValue()), Type::Uint}; | ||
| 1028 | } | ||
| 1029 | return {fmt::format("0x{:X}U", immediate->GetValue()), Type::Uint}; | ||
| 1030 | } | ||
| 1031 | |||
| 1032 | if (const auto predicate = std::get_if<PredicateNode>(&*node)) { | ||
| 1033 | const auto value = [&]() -> std::string { | ||
| 1034 | switch (const auto index = predicate->GetIndex(); index) { | ||
| 1035 | case Tegra::Shader::Pred::UnusedIndex: | ||
| 1036 | return "true"; | ||
| 1037 | case Tegra::Shader::Pred::NeverExecute: | ||
| 1038 | return "false"; | ||
| 1039 | default: | ||
| 1040 | return GetPredicate(index); | ||
| 1041 | } | ||
| 1042 | }(); | ||
| 1043 | if (predicate->IsNegated()) { | ||
| 1044 | return {fmt::format("!({})", value), Type::Bool}; | ||
| 1045 | } | ||
| 1046 | return {value, Type::Bool}; | ||
| 1047 | } | ||
| 1048 | |||
| 1049 | if (const auto abuf = std::get_if<AbufNode>(&*node)) { | ||
| 1050 | UNIMPLEMENTED_IF_MSG(abuf->IsPhysicalBuffer() && stage == ShaderType::Geometry, | ||
| 1051 | "Physical attributes in geometry shaders are not implemented"); | ||
| 1052 | if (abuf->IsPhysicalBuffer()) { | ||
| 1053 | return {fmt::format("ReadPhysicalAttribute({})", | ||
| 1054 | Visit(abuf->GetPhysicalAddress()).AsUint()), | ||
| 1055 | Type::Float}; | ||
| 1056 | } | ||
| 1057 | return ReadAttribute(abuf->GetIndex(), abuf->GetElement(), abuf->GetBuffer()); | ||
| 1058 | } | ||
| 1059 | |||
| 1060 | if (const auto cbuf = std::get_if<CbufNode>(&*node)) { | ||
| 1061 | const Node offset = cbuf->GetOffset(); | ||
| 1062 | |||
| 1063 | if (const auto immediate = std::get_if<ImmediateNode>(&*offset)) { | ||
| 1064 | // Direct access | ||
| 1065 | const u32 offset_imm = immediate->GetValue(); | ||
| 1066 | ASSERT_MSG(offset_imm % 4 == 0, "Unaligned cbuf direct access"); | ||
| 1067 | return {fmt::format("{}[{}][{}]", GetConstBuffer(cbuf->GetIndex()), | ||
| 1068 | offset_imm / (4 * 4), (offset_imm / 4) % 4), | ||
| 1069 | Type::Uint}; | ||
| 1070 | } | ||
| 1071 | |||
| 1072 | // Indirect access | ||
| 1073 | const std::string final_offset = code.GenerateTemporary(); | ||
| 1074 | code.AddLine("uint {} = {} >> 2;", final_offset, Visit(offset).AsUint()); | ||
| 1075 | |||
| 1076 | if (!device.HasComponentIndexingBug()) { | ||
| 1077 | return {fmt::format("{}[{} >> 2][{} & 3]", GetConstBuffer(cbuf->GetIndex()), | ||
| 1078 | final_offset, final_offset), | ||
| 1079 | Type::Uint}; | ||
| 1080 | } | ||
| 1081 | |||
| 1082 | // AMD's proprietary GLSL compiler emits ill code for variable component access. | ||
| 1083 | // To bypass this driver bug generate 4 ifs, one per each component. | ||
| 1084 | const std::string pack = code.GenerateTemporary(); | ||
| 1085 | code.AddLine("uvec4 {} = {}[{} >> 2];", pack, GetConstBuffer(cbuf->GetIndex()), | ||
| 1086 | final_offset); | ||
| 1087 | |||
| 1088 | const std::string result = code.GenerateTemporary(); | ||
| 1089 | code.AddLine("uint {};", result); | ||
| 1090 | for (u32 swizzle = 0; swizzle < 4; ++swizzle) { | ||
| 1091 | code.AddLine("if (({} & 3) == {}) {} = {}{};", final_offset, swizzle, result, pack, | ||
| 1092 | GetSwizzle(swizzle)); | ||
| 1093 | } | ||
| 1094 | return {result, Type::Uint}; | ||
| 1095 | } | ||
| 1096 | |||
| 1097 | if (const auto gmem = std::get_if<GmemNode>(&*node)) { | ||
| 1098 | const std::string real = Visit(gmem->GetRealAddress()).AsUint(); | ||
| 1099 | const std::string base = Visit(gmem->GetBaseAddress()).AsUint(); | ||
| 1100 | const std::string final_offset = fmt::format("({} - {}) >> 2", real, base); | ||
| 1101 | return {fmt::format("{}[{}]", GetGlobalMemory(gmem->GetDescriptor()), final_offset), | ||
| 1102 | Type::Uint}; | ||
| 1103 | } | ||
| 1104 | |||
| 1105 | if (const auto lmem = std::get_if<LmemNode>(&*node)) { | ||
| 1106 | return { | ||
| 1107 | fmt::format("{}[{} >> 2]", GetLocalMemory(), Visit(lmem->GetAddress()).AsUint()), | ||
| 1108 | Type::Uint}; | ||
| 1109 | } | ||
| 1110 | |||
| 1111 | if (const auto smem = std::get_if<SmemNode>(&*node)) { | ||
| 1112 | return {fmt::format("smem[{} >> 2]", Visit(smem->GetAddress()).AsUint()), Type::Uint}; | ||
| 1113 | } | ||
| 1114 | |||
| 1115 | if (const auto internal_flag = std::get_if<InternalFlagNode>(&*node)) { | ||
| 1116 | return {GetInternalFlag(internal_flag->GetFlag()), Type::Bool}; | ||
| 1117 | } | ||
| 1118 | |||
| 1119 | if (const auto conditional = std::get_if<ConditionalNode>(&*node)) { | ||
| 1120 | if (const auto amend_index = conditional->GetAmendIndex()) { | ||
| 1121 | Visit(ir.GetAmendNode(*amend_index)).CheckVoid(); | ||
| 1122 | } | ||
| 1123 | // It's invalid to call conditional on nested nodes, use an operation instead | ||
| 1124 | code.AddLine("if ({}) {{", Visit(conditional->GetCondition()).AsBool()); | ||
| 1125 | ++code.scope; | ||
| 1126 | |||
| 1127 | VisitBlock(conditional->GetCode()); | ||
| 1128 | |||
| 1129 | --code.scope; | ||
| 1130 | code.AddLine("}}"); | ||
| 1131 | return {}; | ||
| 1132 | } | ||
| 1133 | |||
| 1134 | if (const auto comment = std::get_if<CommentNode>(&*node)) { | ||
| 1135 | code.AddLine("// " + comment->GetText()); | ||
| 1136 | return {}; | ||
| 1137 | } | ||
| 1138 | |||
| 1139 | UNREACHABLE(); | ||
| 1140 | return {}; | ||
| 1141 | } | ||
| 1142 | |||
| 1143 | Expression ReadAttribute(Attribute::Index attribute, u32 element, const Node& buffer = {}) { | ||
| 1144 | const auto GeometryPass = [&](std::string_view name) { | ||
| 1145 | if (stage == ShaderType::Geometry && buffer) { | ||
| 1146 | // TODO(Rodrigo): Guard geometry inputs against out of bound reads. Some games | ||
| 1147 | // set an 0x80000000 index for those and the shader fails to build. Find out why | ||
| 1148 | // this happens and what's its intent. | ||
| 1149 | return fmt::format("gs_{}[{} % {}]", name, Visit(buffer).AsUint(), | ||
| 1150 | max_input_vertices.value()); | ||
| 1151 | } | ||
| 1152 | return std::string(name); | ||
| 1153 | }; | ||
| 1154 | |||
| 1155 | switch (attribute) { | ||
| 1156 | case Attribute::Index::Position: | ||
| 1157 | switch (stage) { | ||
| 1158 | case ShaderType::Geometry: | ||
| 1159 | return {fmt::format("gl_in[{}].gl_Position{}", Visit(buffer).AsUint(), | ||
| 1160 | GetSwizzle(element)), | ||
| 1161 | Type::Float}; | ||
| 1162 | case ShaderType::Fragment: | ||
| 1163 | return {"gl_FragCoord"s + GetSwizzle(element), Type::Float}; | ||
| 1164 | default: | ||
| 1165 | UNREACHABLE(); | ||
| 1166 | return {"0", Type::Int}; | ||
| 1167 | } | ||
| 1168 | case Attribute::Index::FrontColor: | ||
| 1169 | return {"gl_Color"s + GetSwizzle(element), Type::Float}; | ||
| 1170 | case Attribute::Index::FrontSecondaryColor: | ||
| 1171 | return {"gl_SecondaryColor"s + GetSwizzle(element), Type::Float}; | ||
| 1172 | case Attribute::Index::PointCoord: | ||
| 1173 | switch (element) { | ||
| 1174 | case 0: | ||
| 1175 | return {"gl_PointCoord.x", Type::Float}; | ||
| 1176 | case 1: | ||
| 1177 | return {"gl_PointCoord.y", Type::Float}; | ||
| 1178 | case 2: | ||
| 1179 | case 3: | ||
| 1180 | return {"0.0f", Type::Float}; | ||
| 1181 | } | ||
| 1182 | UNREACHABLE(); | ||
| 1183 | return {"0", Type::Int}; | ||
| 1184 | case Attribute::Index::TessCoordInstanceIDVertexID: | ||
| 1185 | // TODO(Subv): Find out what the values are for the first two elements when inside a | ||
| 1186 | // vertex shader, and what's the value of the fourth element when inside a Tess Eval | ||
| 1187 | // shader. | ||
| 1188 | ASSERT(stage == ShaderType::Vertex); | ||
| 1189 | switch (element) { | ||
| 1190 | case 2: | ||
| 1191 | // Config pack's first value is instance_id. | ||
| 1192 | return {"gl_InstanceID", Type::Int}; | ||
| 1193 | case 3: | ||
| 1194 | return {"gl_VertexID", Type::Int}; | ||
| 1195 | } | ||
| 1196 | UNIMPLEMENTED_MSG("Unmanaged TessCoordInstanceIDVertexID element={}", element); | ||
| 1197 | return {"0", Type::Int}; | ||
| 1198 | case Attribute::Index::FrontFacing: | ||
| 1199 | // TODO(Subv): Find out what the values are for the other elements. | ||
| 1200 | ASSERT(stage == ShaderType::Fragment); | ||
| 1201 | switch (element) { | ||
| 1202 | case 3: | ||
| 1203 | return {"(gl_FrontFacing ? -1 : 0)", Type::Int}; | ||
| 1204 | } | ||
| 1205 | UNIMPLEMENTED_MSG("Unmanaged FrontFacing element={}", element); | ||
| 1206 | return {"0", Type::Int}; | ||
| 1207 | default: | ||
| 1208 | if (IsGenericAttribute(attribute)) { | ||
| 1209 | return {GeometryPass(GetGenericInputAttribute(attribute)) + GetSwizzle(element), | ||
| 1210 | Type::Float}; | ||
| 1211 | } | ||
| 1212 | if (IsLegacyTexCoord(attribute)) { | ||
| 1213 | UNIMPLEMENTED_IF(stage == ShaderType::Geometry); | ||
| 1214 | return {fmt::format("gl_TexCoord[{}]{}", GetLegacyTexCoordIndex(attribute), | ||
| 1215 | GetSwizzle(element)), | ||
| 1216 | Type::Float}; | ||
| 1217 | } | ||
| 1218 | break; | ||
| 1219 | } | ||
| 1220 | UNIMPLEMENTED_MSG("Unhandled input attribute: {}", attribute); | ||
| 1221 | return {"0", Type::Int}; | ||
| 1222 | } | ||
| 1223 | |||
| 1224 | Expression ApplyPrecise(Operation operation, std::string value, Type type) { | ||
| 1225 | if (!IsPrecise(operation)) { | ||
| 1226 | return {std::move(value), type}; | ||
| 1227 | } | ||
| 1228 | // Old Nvidia drivers have a bug with precise and texture sampling. These are more likely to | ||
| 1229 | // be found in fragment shaders, so we disable precise there. There are vertex shaders that | ||
| 1230 | // also fail to build but nobody seems to care about those. | ||
| 1231 | // Note: Only bugged drivers will skip precise. | ||
| 1232 | const bool disable_precise = device.HasPreciseBug() && stage == ShaderType::Fragment; | ||
| 1233 | |||
| 1234 | std::string temporary = code.GenerateTemporary(); | ||
| 1235 | code.AddLine("{}{} {} = {};", disable_precise ? "" : "precise ", GetTypeString(type), | ||
| 1236 | temporary, value); | ||
| 1237 | return {std::move(temporary), type}; | ||
| 1238 | } | ||
| 1239 | |||
| 1240 | Expression VisitOperand(Operation operation, std::size_t operand_index) { | ||
| 1241 | const auto& operand = operation[operand_index]; | ||
| 1242 | const bool parent_precise = IsPrecise(operation); | ||
| 1243 | const bool child_precise = IsPrecise(operand); | ||
| 1244 | const bool child_trivial = !std::holds_alternative<OperationNode>(*operand); | ||
| 1245 | if (!parent_precise || child_precise || child_trivial) { | ||
| 1246 | return Visit(operand); | ||
| 1247 | } | ||
| 1248 | |||
| 1249 | Expression value = Visit(operand); | ||
| 1250 | std::string temporary = code.GenerateTemporary(); | ||
| 1251 | code.AddLine("{} {} = {};", GetTypeString(value.GetType()), temporary, value.GetCode()); | ||
| 1252 | return {std::move(temporary), value.GetType()}; | ||
| 1253 | } | ||
| 1254 | |||
| 1255 | std::optional<Expression> GetOutputAttribute(const AbufNode* abuf) { | ||
| 1256 | const u32 element = abuf->GetElement(); | ||
| 1257 | switch (const auto attribute = abuf->GetIndex()) { | ||
| 1258 | case Attribute::Index::Position: | ||
| 1259 | return {{"gl_Position"s + GetSwizzle(element), Type::Float}}; | ||
| 1260 | case Attribute::Index::LayerViewportPointSize: | ||
| 1261 | switch (element) { | ||
| 1262 | case 0: | ||
| 1263 | UNIMPLEMENTED(); | ||
| 1264 | return std::nullopt; | ||
| 1265 | case 1: | ||
| 1266 | if (stage == ShaderType::Vertex && !device.HasVertexViewportLayer()) { | ||
| 1267 | return std::nullopt; | ||
| 1268 | } | ||
| 1269 | return {{"gl_Layer", Type::Int}}; | ||
| 1270 | case 2: | ||
| 1271 | if (stage == ShaderType::Vertex && !device.HasVertexViewportLayer()) { | ||
| 1272 | return std::nullopt; | ||
| 1273 | } | ||
| 1274 | return {{"gl_ViewportIndex", Type::Int}}; | ||
| 1275 | case 3: | ||
| 1276 | return {{"gl_PointSize", Type::Float}}; | ||
| 1277 | } | ||
| 1278 | return std::nullopt; | ||
| 1279 | case Attribute::Index::FrontColor: | ||
| 1280 | return {{"gl_FrontColor"s + GetSwizzle(element), Type::Float}}; | ||
| 1281 | case Attribute::Index::FrontSecondaryColor: | ||
| 1282 | return {{"gl_FrontSecondaryColor"s + GetSwizzle(element), Type::Float}}; | ||
| 1283 | case Attribute::Index::BackColor: | ||
| 1284 | return {{"gl_BackColor"s + GetSwizzle(element), Type::Float}}; | ||
| 1285 | case Attribute::Index::BackSecondaryColor: | ||
| 1286 | return {{"gl_BackSecondaryColor"s + GetSwizzle(element), Type::Float}}; | ||
| 1287 | case Attribute::Index::ClipDistances0123: | ||
| 1288 | return {{fmt::format("gl_ClipDistance[{}]", element), Type::Float}}; | ||
| 1289 | case Attribute::Index::ClipDistances4567: | ||
| 1290 | return {{fmt::format("gl_ClipDistance[{}]", element + 4), Type::Float}}; | ||
| 1291 | default: | ||
| 1292 | if (IsGenericAttribute(attribute)) { | ||
| 1293 | return {{GetGenericOutputAttribute(attribute, element), Type::Float}}; | ||
| 1294 | } | ||
| 1295 | if (IsLegacyTexCoord(attribute)) { | ||
| 1296 | return {{fmt::format("gl_TexCoord[{}]{}", GetLegacyTexCoordIndex(attribute), | ||
| 1297 | GetSwizzle(element)), | ||
| 1298 | Type::Float}}; | ||
| 1299 | } | ||
| 1300 | UNIMPLEMENTED_MSG("Unhandled output attribute: {}", attribute); | ||
| 1301 | return std::nullopt; | ||
| 1302 | } | ||
| 1303 | } | ||
| 1304 | |||
| 1305 | Expression GenerateUnary(Operation operation, std::string_view func, Type result_type, | ||
| 1306 | Type type_a) { | ||
| 1307 | std::string op_str = fmt::format("{}({})", func, VisitOperand(operation, 0).As(type_a)); | ||
| 1308 | return ApplyPrecise(operation, std::move(op_str), result_type); | ||
| 1309 | } | ||
| 1310 | |||
| 1311 | Expression GenerateBinaryInfix(Operation operation, std::string_view func, Type result_type, | ||
| 1312 | Type type_a, Type type_b) { | ||
| 1313 | const std::string op_a = VisitOperand(operation, 0).As(type_a); | ||
| 1314 | const std::string op_b = VisitOperand(operation, 1).As(type_b); | ||
| 1315 | std::string op_str = fmt::format("({} {} {})", op_a, func, op_b); | ||
| 1316 | |||
| 1317 | return ApplyPrecise(operation, std::move(op_str), result_type); | ||
| 1318 | } | ||
| 1319 | |||
| 1320 | Expression GenerateBinaryCall(Operation operation, std::string_view func, Type result_type, | ||
| 1321 | Type type_a, Type type_b) { | ||
| 1322 | const std::string op_a = VisitOperand(operation, 0).As(type_a); | ||
| 1323 | const std::string op_b = VisitOperand(operation, 1).As(type_b); | ||
| 1324 | std::string op_str = fmt::format("{}({}, {})", func, op_a, op_b); | ||
| 1325 | |||
| 1326 | return ApplyPrecise(operation, std::move(op_str), result_type); | ||
| 1327 | } | ||
| 1328 | |||
| 1329 | Expression GenerateTernary(Operation operation, std::string_view func, Type result_type, | ||
| 1330 | Type type_a, Type type_b, Type type_c) { | ||
| 1331 | const std::string op_a = VisitOperand(operation, 0).As(type_a); | ||
| 1332 | const std::string op_b = VisitOperand(operation, 1).As(type_b); | ||
| 1333 | const std::string op_c = VisitOperand(operation, 2).As(type_c); | ||
| 1334 | std::string op_str = fmt::format("{}({}, {}, {})", func, op_a, op_b, op_c); | ||
| 1335 | |||
| 1336 | return ApplyPrecise(operation, std::move(op_str), result_type); | ||
| 1337 | } | ||
| 1338 | |||
| 1339 | Expression GenerateQuaternary(Operation operation, const std::string& func, Type result_type, | ||
| 1340 | Type type_a, Type type_b, Type type_c, Type type_d) { | ||
| 1341 | const std::string op_a = VisitOperand(operation, 0).As(type_a); | ||
| 1342 | const std::string op_b = VisitOperand(operation, 1).As(type_b); | ||
| 1343 | const std::string op_c = VisitOperand(operation, 2).As(type_c); | ||
| 1344 | const std::string op_d = VisitOperand(operation, 3).As(type_d); | ||
| 1345 | std::string op_str = fmt::format("{}({}, {}, {}, {})", func, op_a, op_b, op_c, op_d); | ||
| 1346 | |||
| 1347 | return ApplyPrecise(operation, std::move(op_str), result_type); | ||
| 1348 | } | ||
| 1349 | |||
| 1350 | std::string GenerateTexture(Operation operation, const std::string& function_suffix, | ||
| 1351 | const std::vector<TextureIR>& extras, bool separate_dc = false) { | ||
| 1352 | constexpr std::array coord_constructors = {"float", "vec2", "vec3", "vec4"}; | ||
| 1353 | |||
| 1354 | const auto meta = std::get_if<MetaTexture>(&operation.GetMeta()); | ||
| 1355 | ASSERT(meta); | ||
| 1356 | |||
| 1357 | const std::size_t count = operation.GetOperandsCount(); | ||
| 1358 | const bool has_array = meta->sampler.is_array; | ||
| 1359 | const bool has_shadow = meta->sampler.is_shadow; | ||
| 1360 | const bool workaround_lod_array_shadow_as_grad = | ||
| 1361 | !device.HasTextureShadowLod() && function_suffix == "Lod" && meta->sampler.is_shadow && | ||
| 1362 | ((meta->sampler.type == TextureType::Texture2D && meta->sampler.is_array) || | ||
| 1363 | meta->sampler.type == TextureType::TextureCube); | ||
| 1364 | |||
| 1365 | std::string expr = "texture"; | ||
| 1366 | |||
| 1367 | if (workaround_lod_array_shadow_as_grad) { | ||
| 1368 | expr += "Grad"; | ||
| 1369 | } else { | ||
| 1370 | expr += function_suffix; | ||
| 1371 | } | ||
| 1372 | |||
| 1373 | if (!meta->aoffi.empty()) { | ||
| 1374 | expr += "Offset"; | ||
| 1375 | } else if (!meta->ptp.empty()) { | ||
| 1376 | expr += "Offsets"; | ||
| 1377 | } | ||
| 1378 | if (!meta->sampler.is_indexed) { | ||
| 1379 | expr += '(' + GetSampler(meta->sampler) + ", "; | ||
| 1380 | } else { | ||
| 1381 | expr += '(' + GetSampler(meta->sampler) + '[' + Visit(meta->index).AsUint() + "], "; | ||
| 1382 | } | ||
| 1383 | expr += coord_constructors.at(count + (has_array ? 1 : 0) + | ||
| 1384 | (has_shadow && !separate_dc ? 1 : 0) - 1); | ||
| 1385 | expr += '('; | ||
| 1386 | for (std::size_t i = 0; i < count; ++i) { | ||
| 1387 | expr += Visit(operation[i]).AsFloat(); | ||
| 1388 | |||
| 1389 | const std::size_t next = i + 1; | ||
| 1390 | if (next < count) | ||
| 1391 | expr += ", "; | ||
| 1392 | } | ||
| 1393 | if (has_array) { | ||
| 1394 | expr += ", float(" + Visit(meta->array).AsInt() + ')'; | ||
| 1395 | } | ||
| 1396 | if (has_shadow) { | ||
| 1397 | if (separate_dc) { | ||
| 1398 | expr += "), " + Visit(meta->depth_compare).AsFloat(); | ||
| 1399 | } else { | ||
| 1400 | expr += ", " + Visit(meta->depth_compare).AsFloat() + ')'; | ||
| 1401 | } | ||
| 1402 | } else { | ||
| 1403 | expr += ')'; | ||
| 1404 | } | ||
| 1405 | |||
| 1406 | if (workaround_lod_array_shadow_as_grad) { | ||
| 1407 | switch (meta->sampler.type) { | ||
| 1408 | case TextureType::Texture2D: | ||
| 1409 | return expr + ", vec2(0.0), vec2(0.0))"; | ||
| 1410 | case TextureType::TextureCube: | ||
| 1411 | return expr + ", vec3(0.0), vec3(0.0))"; | ||
| 1412 | default: | ||
| 1413 | UNREACHABLE(); | ||
| 1414 | break; | ||
| 1415 | } | ||
| 1416 | } | ||
| 1417 | |||
| 1418 | for (const auto& variant : extras) { | ||
| 1419 | if (const auto argument = std::get_if<TextureArgument>(&variant)) { | ||
| 1420 | expr += GenerateTextureArgument(*argument); | ||
| 1421 | } else if (std::holds_alternative<TextureOffset>(variant)) { | ||
| 1422 | if (!meta->aoffi.empty()) { | ||
| 1423 | expr += GenerateTextureAoffi(meta->aoffi); | ||
| 1424 | } else if (!meta->ptp.empty()) { | ||
| 1425 | expr += GenerateTexturePtp(meta->ptp); | ||
| 1426 | } | ||
| 1427 | } else if (std::holds_alternative<TextureDerivates>(variant)) { | ||
| 1428 | expr += GenerateTextureDerivates(meta->derivates); | ||
| 1429 | } else { | ||
| 1430 | UNREACHABLE(); | ||
| 1431 | } | ||
| 1432 | } | ||
| 1433 | |||
| 1434 | return expr + ')'; | ||
| 1435 | } | ||
| 1436 | |||
| 1437 | std::string GenerateTextureArgument(const TextureArgument& argument) { | ||
| 1438 | const auto& [type, operand] = argument; | ||
| 1439 | if (operand == nullptr) { | ||
| 1440 | return {}; | ||
| 1441 | } | ||
| 1442 | |||
| 1443 | std::string expr = ", "; | ||
| 1444 | switch (type) { | ||
| 1445 | case Type::Int: | ||
| 1446 | if (const auto immediate = std::get_if<ImmediateNode>(&*operand)) { | ||
| 1447 | // Inline the string as an immediate integer in GLSL (some extra arguments are | ||
| 1448 | // required to be constant) | ||
| 1449 | expr += std::to_string(static_cast<s32>(immediate->GetValue())); | ||
| 1450 | } else { | ||
| 1451 | expr += Visit(operand).AsInt(); | ||
| 1452 | } | ||
| 1453 | break; | ||
| 1454 | case Type::Float: | ||
| 1455 | expr += Visit(operand).AsFloat(); | ||
| 1456 | break; | ||
| 1457 | default: { | ||
| 1458 | const auto type_int = static_cast<u32>(type); | ||
| 1459 | UNIMPLEMENTED_MSG("Unimplemented extra type={}", type_int); | ||
| 1460 | expr += '0'; | ||
| 1461 | break; | ||
| 1462 | } | ||
| 1463 | } | ||
| 1464 | return expr; | ||
| 1465 | } | ||
| 1466 | |||
| 1467 | std::string ReadTextureOffset(const Node& value) { | ||
| 1468 | if (const auto immediate = std::get_if<ImmediateNode>(&*value)) { | ||
| 1469 | // Inline the string as an immediate integer in GLSL (AOFFI arguments are required | ||
| 1470 | // to be constant by the standard). | ||
| 1471 | return std::to_string(static_cast<s32>(immediate->GetValue())); | ||
| 1472 | } else if (device.HasVariableAoffi()) { | ||
| 1473 | // Avoid using variable AOFFI on unsupported devices. | ||
| 1474 | return Visit(value).AsInt(); | ||
| 1475 | } else { | ||
| 1476 | // Insert 0 on devices not supporting variable AOFFI. | ||
| 1477 | return "0"; | ||
| 1478 | } | ||
| 1479 | } | ||
| 1480 | |||
| 1481 | std::string GenerateTextureAoffi(const std::vector<Node>& aoffi) { | ||
| 1482 | if (aoffi.empty()) { | ||
| 1483 | return {}; | ||
| 1484 | } | ||
| 1485 | constexpr std::array coord_constructors = {"int", "ivec2", "ivec3"}; | ||
| 1486 | std::string expr = ", "; | ||
| 1487 | expr += coord_constructors.at(aoffi.size() - 1); | ||
| 1488 | expr += '('; | ||
| 1489 | |||
| 1490 | for (std::size_t index = 0; index < aoffi.size(); ++index) { | ||
| 1491 | expr += ReadTextureOffset(aoffi.at(index)); | ||
| 1492 | if (index + 1 < aoffi.size()) { | ||
| 1493 | expr += ", "; | ||
| 1494 | } | ||
| 1495 | } | ||
| 1496 | expr += ')'; | ||
| 1497 | |||
| 1498 | return expr; | ||
| 1499 | } | ||
| 1500 | |||
| 1501 | std::string GenerateTexturePtp(const std::vector<Node>& ptp) { | ||
| 1502 | static constexpr std::size_t num_vectors = 4; | ||
| 1503 | ASSERT(ptp.size() == num_vectors * 2); | ||
| 1504 | |||
| 1505 | std::string expr = ", ivec2[]("; | ||
| 1506 | for (std::size_t vector = 0; vector < num_vectors; ++vector) { | ||
| 1507 | const bool has_next = vector + 1 < num_vectors; | ||
| 1508 | expr += fmt::format("ivec2({}, {}){}", ReadTextureOffset(ptp.at(vector * 2)), | ||
| 1509 | ReadTextureOffset(ptp.at(vector * 2 + 1)), has_next ? ", " : ""); | ||
| 1510 | } | ||
| 1511 | expr += ')'; | ||
| 1512 | return expr; | ||
| 1513 | } | ||
| 1514 | |||
| 1515 | std::string GenerateTextureDerivates(const std::vector<Node>& derivates) { | ||
| 1516 | if (derivates.empty()) { | ||
| 1517 | return {}; | ||
| 1518 | } | ||
| 1519 | constexpr std::array coord_constructors = {"float", "vec2", "vec3"}; | ||
| 1520 | std::string expr = ", "; | ||
| 1521 | const std::size_t components = derivates.size() / 2; | ||
| 1522 | std::string dx = coord_constructors.at(components - 1); | ||
| 1523 | std::string dy = coord_constructors.at(components - 1); | ||
| 1524 | dx += '('; | ||
| 1525 | dy += '('; | ||
| 1526 | |||
| 1527 | for (std::size_t index = 0; index < components; ++index) { | ||
| 1528 | const auto& operand_x{derivates.at(index * 2)}; | ||
| 1529 | const auto& operand_y{derivates.at(index * 2 + 1)}; | ||
| 1530 | dx += Visit(operand_x).AsFloat(); | ||
| 1531 | dy += Visit(operand_y).AsFloat(); | ||
| 1532 | |||
| 1533 | if (index + 1 < components) { | ||
| 1534 | dx += ", "; | ||
| 1535 | dy += ", "; | ||
| 1536 | } | ||
| 1537 | } | ||
| 1538 | dx += ')'; | ||
| 1539 | dy += ')'; | ||
| 1540 | expr += dx + ", " + dy; | ||
| 1541 | |||
| 1542 | return expr; | ||
| 1543 | } | ||
| 1544 | |||
| 1545 | std::string BuildIntegerCoordinates(Operation operation) { | ||
| 1546 | constexpr std::array constructors{"int(", "ivec2(", "ivec3(", "ivec4("}; | ||
| 1547 | const std::size_t coords_count{operation.GetOperandsCount()}; | ||
| 1548 | std::string expr = constructors.at(coords_count - 1); | ||
| 1549 | for (std::size_t i = 0; i < coords_count; ++i) { | ||
| 1550 | expr += VisitOperand(operation, i).AsInt(); | ||
| 1551 | if (i + 1 < coords_count) { | ||
| 1552 | expr += ", "; | ||
| 1553 | } | ||
| 1554 | } | ||
| 1555 | expr += ')'; | ||
| 1556 | return expr; | ||
| 1557 | } | ||
| 1558 | |||
| 1559 | std::string BuildImageValues(Operation operation) { | ||
| 1560 | constexpr std::array constructors{"uint", "uvec2", "uvec3", "uvec4"}; | ||
| 1561 | const auto& meta{std::get<MetaImage>(operation.GetMeta())}; | ||
| 1562 | |||
| 1563 | const std::size_t values_count{meta.values.size()}; | ||
| 1564 | std::string expr = fmt::format("{}(", constructors.at(values_count - 1)); | ||
| 1565 | for (std::size_t i = 0; i < values_count; ++i) { | ||
| 1566 | expr += Visit(meta.values.at(i)).AsUint(); | ||
| 1567 | if (i + 1 < values_count) { | ||
| 1568 | expr += ", "; | ||
| 1569 | } | ||
| 1570 | } | ||
| 1571 | expr += ')'; | ||
| 1572 | return expr; | ||
| 1573 | } | ||
| 1574 | |||
| 1575 | Expression Assign(Operation operation) { | ||
| 1576 | const Node& dest = operation[0]; | ||
| 1577 | const Node& src = operation[1]; | ||
| 1578 | |||
| 1579 | Expression target; | ||
| 1580 | if (const auto gpr = std::get_if<GprNode>(&*dest)) { | ||
| 1581 | if (gpr->GetIndex() == Register::ZeroIndex) { | ||
| 1582 | // Writing to Register::ZeroIndex is a no op but we still have to visit the source | ||
| 1583 | // as it might have side effects. | ||
| 1584 | code.AddLine("{};", Visit(src).GetCode()); | ||
| 1585 | return {}; | ||
| 1586 | } | ||
| 1587 | target = {GetRegister(gpr->GetIndex()), Type::Float}; | ||
| 1588 | } else if (const auto abuf = std::get_if<AbufNode>(&*dest)) { | ||
| 1589 | UNIMPLEMENTED_IF(abuf->IsPhysicalBuffer()); | ||
| 1590 | auto output = GetOutputAttribute(abuf); | ||
| 1591 | if (!output) { | ||
| 1592 | return {}; | ||
| 1593 | } | ||
| 1594 | target = std::move(*output); | ||
| 1595 | } else if (const auto lmem = std::get_if<LmemNode>(&*dest)) { | ||
| 1596 | target = { | ||
| 1597 | fmt::format("{}[{} >> 2]", GetLocalMemory(), Visit(lmem->GetAddress()).AsUint()), | ||
| 1598 | Type::Uint}; | ||
| 1599 | } else if (const auto smem = std::get_if<SmemNode>(&*dest)) { | ||
| 1600 | ASSERT(stage == ShaderType::Compute); | ||
| 1601 | target = {fmt::format("smem[{} >> 2]", Visit(smem->GetAddress()).AsUint()), Type::Uint}; | ||
| 1602 | } else if (const auto gmem = std::get_if<GmemNode>(&*dest)) { | ||
| 1603 | const std::string real = Visit(gmem->GetRealAddress()).AsUint(); | ||
| 1604 | const std::string base = Visit(gmem->GetBaseAddress()).AsUint(); | ||
| 1605 | const std::string final_offset = fmt::format("({} - {}) >> 2", real, base); | ||
| 1606 | target = {fmt::format("{}[{}]", GetGlobalMemory(gmem->GetDescriptor()), final_offset), | ||
| 1607 | Type::Uint}; | ||
| 1608 | } else if (const auto cv = std::get_if<CustomVarNode>(&*dest)) { | ||
| 1609 | target = {GetCustomVariable(cv->GetIndex()), Type::Float}; | ||
| 1610 | } else { | ||
| 1611 | UNREACHABLE_MSG("Assign called without a proper target"); | ||
| 1612 | } | ||
| 1613 | |||
| 1614 | code.AddLine("{} = {};", target.GetCode(), Visit(src).As(target.GetType())); | ||
| 1615 | return {}; | ||
| 1616 | } | ||
| 1617 | |||
| 1618 | template <Type type> | ||
| 1619 | Expression Add(Operation operation) { | ||
| 1620 | return GenerateBinaryInfix(operation, "+", type, type, type); | ||
| 1621 | } | ||
| 1622 | |||
| 1623 | template <Type type> | ||
| 1624 | Expression Mul(Operation operation) { | ||
| 1625 | return GenerateBinaryInfix(operation, "*", type, type, type); | ||
| 1626 | } | ||
| 1627 | |||
| 1628 | template <Type type> | ||
| 1629 | Expression Div(Operation operation) { | ||
| 1630 | return GenerateBinaryInfix(operation, "/", type, type, type); | ||
| 1631 | } | ||
| 1632 | |||
| 1633 | template <Type type> | ||
| 1634 | Expression Fma(Operation operation) { | ||
| 1635 | return GenerateTernary(operation, "fma", type, type, type, type); | ||
| 1636 | } | ||
| 1637 | |||
| 1638 | template <Type type> | ||
| 1639 | Expression Negate(Operation operation) { | ||
| 1640 | return GenerateUnary(operation, "-", type, type); | ||
| 1641 | } | ||
| 1642 | |||
| 1643 | template <Type type> | ||
| 1644 | Expression Absolute(Operation operation) { | ||
| 1645 | return GenerateUnary(operation, "abs", type, type); | ||
| 1646 | } | ||
| 1647 | |||
| 1648 | Expression FClamp(Operation operation) { | ||
| 1649 | return GenerateTernary(operation, "clamp", Type::Float, Type::Float, Type::Float, | ||
| 1650 | Type::Float); | ||
| 1651 | } | ||
| 1652 | |||
| 1653 | Expression FCastHalf0(Operation operation) { | ||
| 1654 | return {fmt::format("({})[0]", VisitOperand(operation, 0).AsHalfFloat()), Type::Float}; | ||
| 1655 | } | ||
| 1656 | |||
| 1657 | Expression FCastHalf1(Operation operation) { | ||
| 1658 | return {fmt::format("({})[1]", VisitOperand(operation, 0).AsHalfFloat()), Type::Float}; | ||
| 1659 | } | ||
| 1660 | |||
| 1661 | template <Type type> | ||
| 1662 | Expression Min(Operation operation) { | ||
| 1663 | return GenerateBinaryCall(operation, "min", type, type, type); | ||
| 1664 | } | ||
| 1665 | |||
| 1666 | template <Type type> | ||
| 1667 | Expression Max(Operation operation) { | ||
| 1668 | return GenerateBinaryCall(operation, "max", type, type, type); | ||
| 1669 | } | ||
| 1670 | |||
| 1671 | Expression Select(Operation operation) { | ||
| 1672 | const std::string condition = Visit(operation[0]).AsBool(); | ||
| 1673 | const std::string true_case = Visit(operation[1]).AsUint(); | ||
| 1674 | const std::string false_case = Visit(operation[2]).AsUint(); | ||
| 1675 | std::string op_str = fmt::format("({} ? {} : {})", condition, true_case, false_case); | ||
| 1676 | |||
| 1677 | return ApplyPrecise(operation, std::move(op_str), Type::Uint); | ||
| 1678 | } | ||
| 1679 | |||
| 1680 | Expression FCos(Operation operation) { | ||
| 1681 | return GenerateUnary(operation, "cos", Type::Float, Type::Float); | ||
| 1682 | } | ||
| 1683 | |||
| 1684 | Expression FSin(Operation operation) { | ||
| 1685 | return GenerateUnary(operation, "sin", Type::Float, Type::Float); | ||
| 1686 | } | ||
| 1687 | |||
| 1688 | Expression FExp2(Operation operation) { | ||
| 1689 | return GenerateUnary(operation, "exp2", Type::Float, Type::Float); | ||
| 1690 | } | ||
| 1691 | |||
| 1692 | Expression FLog2(Operation operation) { | ||
| 1693 | return GenerateUnary(operation, "log2", Type::Float, Type::Float); | ||
| 1694 | } | ||
| 1695 | |||
| 1696 | Expression FInverseSqrt(Operation operation) { | ||
| 1697 | return GenerateUnary(operation, "inversesqrt", Type::Float, Type::Float); | ||
| 1698 | } | ||
| 1699 | |||
| 1700 | Expression FSqrt(Operation operation) { | ||
| 1701 | return GenerateUnary(operation, "sqrt", Type::Float, Type::Float); | ||
| 1702 | } | ||
| 1703 | |||
| 1704 | Expression FRoundEven(Operation operation) { | ||
| 1705 | return GenerateUnary(operation, "roundEven", Type::Float, Type::Float); | ||
| 1706 | } | ||
| 1707 | |||
| 1708 | Expression FFloor(Operation operation) { | ||
| 1709 | return GenerateUnary(operation, "floor", Type::Float, Type::Float); | ||
| 1710 | } | ||
| 1711 | |||
| 1712 | Expression FCeil(Operation operation) { | ||
| 1713 | return GenerateUnary(operation, "ceil", Type::Float, Type::Float); | ||
| 1714 | } | ||
| 1715 | |||
| 1716 | Expression FTrunc(Operation operation) { | ||
| 1717 | return GenerateUnary(operation, "trunc", Type::Float, Type::Float); | ||
| 1718 | } | ||
| 1719 | |||
| 1720 | template <Type type> | ||
| 1721 | Expression FCastInteger(Operation operation) { | ||
| 1722 | return GenerateUnary(operation, "float", Type::Float, type); | ||
| 1723 | } | ||
| 1724 | |||
| 1725 | Expression FSwizzleAdd(Operation operation) { | ||
| 1726 | const std::string op_a = VisitOperand(operation, 0).AsFloat(); | ||
| 1727 | const std::string op_b = VisitOperand(operation, 1).AsFloat(); | ||
| 1728 | |||
| 1729 | if (!device.HasShaderBallot()) { | ||
| 1730 | LOG_ERROR(Render_OpenGL, "Shader ballot is unavailable but required by the shader"); | ||
| 1731 | return {fmt::format("{} + {}", op_a, op_b), Type::Float}; | ||
| 1732 | } | ||
| 1733 | |||
| 1734 | const std::string instr_mask = VisitOperand(operation, 2).AsUint(); | ||
| 1735 | const std::string mask = code.GenerateTemporary(); | ||
| 1736 | code.AddLine("uint {} = ({} >> ((gl_SubGroupInvocationARB & 3) << 1)) & 3;", mask, | ||
| 1737 | instr_mask); | ||
| 1738 | |||
| 1739 | const std::string modifier_a = fmt::format("fswzadd_modifiers_a[{}]", mask); | ||
| 1740 | const std::string modifier_b = fmt::format("fswzadd_modifiers_b[{}]", mask); | ||
| 1741 | return {fmt::format("(({} * {}) + ({} * {}))", op_a, modifier_a, op_b, modifier_b), | ||
| 1742 | Type::Float}; | ||
| 1743 | } | ||
| 1744 | |||
| 1745 | Expression ICastFloat(Operation operation) { | ||
| 1746 | return GenerateUnary(operation, "int", Type::Int, Type::Float); | ||
| 1747 | } | ||
| 1748 | |||
| 1749 | Expression ICastUnsigned(Operation operation) { | ||
| 1750 | return GenerateUnary(operation, "int", Type::Int, Type::Uint); | ||
| 1751 | } | ||
| 1752 | |||
| 1753 | template <Type type> | ||
| 1754 | Expression LogicalShiftLeft(Operation operation) { | ||
| 1755 | return GenerateBinaryInfix(operation, "<<", type, type, Type::Uint); | ||
| 1756 | } | ||
| 1757 | |||
| 1758 | Expression ILogicalShiftRight(Operation operation) { | ||
| 1759 | const std::string op_a = VisitOperand(operation, 0).AsUint(); | ||
| 1760 | const std::string op_b = VisitOperand(operation, 1).AsUint(); | ||
| 1761 | std::string op_str = fmt::format("int({} >> {})", op_a, op_b); | ||
| 1762 | |||
| 1763 | return ApplyPrecise(operation, std::move(op_str), Type::Int); | ||
| 1764 | } | ||
| 1765 | |||
| 1766 | Expression IArithmeticShiftRight(Operation operation) { | ||
| 1767 | return GenerateBinaryInfix(operation, ">>", Type::Int, Type::Int, Type::Uint); | ||
| 1768 | } | ||
| 1769 | |||
| 1770 | template <Type type> | ||
| 1771 | Expression BitwiseAnd(Operation operation) { | ||
| 1772 | return GenerateBinaryInfix(operation, "&", type, type, type); | ||
| 1773 | } | ||
| 1774 | |||
| 1775 | template <Type type> | ||
| 1776 | Expression BitwiseOr(Operation operation) { | ||
| 1777 | return GenerateBinaryInfix(operation, "|", type, type, type); | ||
| 1778 | } | ||
| 1779 | |||
| 1780 | template <Type type> | ||
| 1781 | Expression BitwiseXor(Operation operation) { | ||
| 1782 | return GenerateBinaryInfix(operation, "^", type, type, type); | ||
| 1783 | } | ||
| 1784 | |||
| 1785 | template <Type type> | ||
| 1786 | Expression BitwiseNot(Operation operation) { | ||
| 1787 | return GenerateUnary(operation, "~", type, type); | ||
| 1788 | } | ||
| 1789 | |||
| 1790 | Expression UCastFloat(Operation operation) { | ||
| 1791 | return GenerateUnary(operation, "uint", Type::Uint, Type::Float); | ||
| 1792 | } | ||
| 1793 | |||
| 1794 | Expression UCastSigned(Operation operation) { | ||
| 1795 | return GenerateUnary(operation, "uint", Type::Uint, Type::Int); | ||
| 1796 | } | ||
| 1797 | |||
| 1798 | Expression UShiftRight(Operation operation) { | ||
| 1799 | return GenerateBinaryInfix(operation, ">>", Type::Uint, Type::Uint, Type::Uint); | ||
| 1800 | } | ||
| 1801 | |||
| 1802 | template <Type type> | ||
| 1803 | Expression BitfieldInsert(Operation operation) { | ||
| 1804 | return GenerateQuaternary(operation, "bitfieldInsert", type, type, type, Type::Int, | ||
| 1805 | Type::Int); | ||
| 1806 | } | ||
| 1807 | |||
| 1808 | template <Type type> | ||
| 1809 | Expression BitfieldExtract(Operation operation) { | ||
| 1810 | return GenerateTernary(operation, "bitfieldExtract", type, type, Type::Int, Type::Int); | ||
| 1811 | } | ||
| 1812 | |||
| 1813 | template <Type type> | ||
| 1814 | Expression BitCount(Operation operation) { | ||
| 1815 | return GenerateUnary(operation, "bitCount", type, type); | ||
| 1816 | } | ||
| 1817 | |||
| 1818 | template <Type type> | ||
| 1819 | Expression BitMSB(Operation operation) { | ||
| 1820 | return GenerateUnary(operation, "findMSB", type, type); | ||
| 1821 | } | ||
| 1822 | |||
| 1823 | Expression HNegate(Operation operation) { | ||
| 1824 | const auto GetNegate = [&](std::size_t index) { | ||
| 1825 | return VisitOperand(operation, index).AsBool() + " ? -1 : 1"; | ||
| 1826 | }; | ||
| 1827 | return {fmt::format("({} * vec2({}, {}))", VisitOperand(operation, 0).AsHalfFloat(), | ||
| 1828 | GetNegate(1), GetNegate(2)), | ||
| 1829 | Type::HalfFloat}; | ||
| 1830 | } | ||
| 1831 | |||
| 1832 | Expression HClamp(Operation operation) { | ||
| 1833 | const std::string value = VisitOperand(operation, 0).AsHalfFloat(); | ||
| 1834 | const std::string min = VisitOperand(operation, 1).AsFloat(); | ||
| 1835 | const std::string max = VisitOperand(operation, 2).AsFloat(); | ||
| 1836 | std::string clamped = fmt::format("clamp({}, vec2({}), vec2({}))", value, min, max); | ||
| 1837 | |||
| 1838 | return ApplyPrecise(operation, std::move(clamped), Type::HalfFloat); | ||
| 1839 | } | ||
| 1840 | |||
| 1841 | Expression HCastFloat(Operation operation) { | ||
| 1842 | return {fmt::format("vec2({}, 0.0f)", VisitOperand(operation, 0).AsFloat()), | ||
| 1843 | Type::HalfFloat}; | ||
| 1844 | } | ||
| 1845 | |||
| 1846 | Expression HUnpack(Operation operation) { | ||
| 1847 | Expression operand = VisitOperand(operation, 0); | ||
| 1848 | switch (std::get<Tegra::Shader::HalfType>(operation.GetMeta())) { | ||
| 1849 | case Tegra::Shader::HalfType::H0_H1: | ||
| 1850 | return operand; | ||
| 1851 | case Tegra::Shader::HalfType::F32: | ||
| 1852 | return {fmt::format("vec2({})", operand.AsFloat()), Type::HalfFloat}; | ||
| 1853 | case Tegra::Shader::HalfType::H0_H0: | ||
| 1854 | return {fmt::format("vec2({}[0])", operand.AsHalfFloat()), Type::HalfFloat}; | ||
| 1855 | case Tegra::Shader::HalfType::H1_H1: | ||
| 1856 | return {fmt::format("vec2({}[1])", operand.AsHalfFloat()), Type::HalfFloat}; | ||
| 1857 | } | ||
| 1858 | UNREACHABLE(); | ||
| 1859 | return {"0", Type::Int}; | ||
| 1860 | } | ||
| 1861 | |||
| 1862 | Expression HMergeF32(Operation operation) { | ||
| 1863 | return {fmt::format("float({}[0])", VisitOperand(operation, 0).AsHalfFloat()), Type::Float}; | ||
| 1864 | } | ||
| 1865 | |||
| 1866 | Expression HMergeH0(Operation operation) { | ||
| 1867 | const std::string dest = VisitOperand(operation, 0).AsUint(); | ||
| 1868 | const std::string src = VisitOperand(operation, 1).AsUint(); | ||
| 1869 | return {fmt::format("vec2(unpackHalf2x16({}).x, unpackHalf2x16({}).y)", src, dest), | ||
| 1870 | Type::HalfFloat}; | ||
| 1871 | } | ||
| 1872 | |||
| 1873 | Expression HMergeH1(Operation operation) { | ||
| 1874 | const std::string dest = VisitOperand(operation, 0).AsUint(); | ||
| 1875 | const std::string src = VisitOperand(operation, 1).AsUint(); | ||
| 1876 | return {fmt::format("vec2(unpackHalf2x16({}).x, unpackHalf2x16({}).y)", dest, src), | ||
| 1877 | Type::HalfFloat}; | ||
| 1878 | } | ||
| 1879 | |||
| 1880 | Expression HPack2(Operation operation) { | ||
| 1881 | return {fmt::format("vec2({}, {})", VisitOperand(operation, 0).AsFloat(), | ||
| 1882 | VisitOperand(operation, 1).AsFloat()), | ||
| 1883 | Type::HalfFloat}; | ||
| 1884 | } | ||
| 1885 | |||
| 1886 | template <const std::string_view& op, Type type, bool unordered = false> | ||
| 1887 | Expression Comparison(Operation operation) { | ||
| 1888 | static_assert(!unordered || type == Type::Float); | ||
| 1889 | |||
| 1890 | Expression expr = GenerateBinaryInfix(operation, op, Type::Bool, type, type); | ||
| 1891 | |||
| 1892 | if constexpr (op.compare("!=") == 0 && type == Type::Float && !unordered) { | ||
| 1893 | // GLSL's operator!=(float, float) doesn't seem be ordered. This happens on both AMD's | ||
| 1894 | // and Nvidia's proprietary stacks. Manually force an ordered comparison. | ||
| 1895 | return {fmt::format("({} && !isnan({}) && !isnan({}))", expr.AsBool(), | ||
| 1896 | VisitOperand(operation, 0).AsFloat(), | ||
| 1897 | VisitOperand(operation, 1).AsFloat()), | ||
| 1898 | Type::Bool}; | ||
| 1899 | } | ||
| 1900 | if constexpr (!unordered) { | ||
| 1901 | return expr; | ||
| 1902 | } | ||
| 1903 | // Unordered comparisons are always true for NaN operands. | ||
| 1904 | return {fmt::format("({} || isnan({}) || isnan({}))", expr.AsBool(), | ||
| 1905 | VisitOperand(operation, 0).AsFloat(), | ||
| 1906 | VisitOperand(operation, 1).AsFloat()), | ||
| 1907 | Type::Bool}; | ||
| 1908 | } | ||
| 1909 | |||
| 1910 | Expression FOrdered(Operation operation) { | ||
| 1911 | return {fmt::format("(!isnan({}) && !isnan({}))", VisitOperand(operation, 0).AsFloat(), | ||
| 1912 | VisitOperand(operation, 1).AsFloat()), | ||
| 1913 | Type::Bool}; | ||
| 1914 | } | ||
| 1915 | |||
| 1916 | Expression FUnordered(Operation operation) { | ||
| 1917 | return {fmt::format("(isnan({}) || isnan({}))", VisitOperand(operation, 0).AsFloat(), | ||
| 1918 | VisitOperand(operation, 1).AsFloat()), | ||
| 1919 | Type::Bool}; | ||
| 1920 | } | ||
| 1921 | |||
| 1922 | Expression LogicalAddCarry(Operation operation) { | ||
| 1923 | const std::string carry = code.GenerateTemporary(); | ||
| 1924 | code.AddLine("uint {};", carry); | ||
| 1925 | code.AddLine("uaddCarry({}, {}, {});", VisitOperand(operation, 0).AsUint(), | ||
| 1926 | VisitOperand(operation, 1).AsUint(), carry); | ||
| 1927 | return {fmt::format("({} != 0)", carry), Type::Bool}; | ||
| 1928 | } | ||
| 1929 | |||
| 1930 | Expression LogicalAssign(Operation operation) { | ||
| 1931 | const Node& dest = operation[0]; | ||
| 1932 | const Node& src = operation[1]; | ||
| 1933 | |||
| 1934 | std::string target; | ||
| 1935 | |||
| 1936 | if (const auto pred = std::get_if<PredicateNode>(&*dest)) { | ||
| 1937 | ASSERT_MSG(!pred->IsNegated(), "Negating logical assignment"); | ||
| 1938 | |||
| 1939 | const auto index = pred->GetIndex(); | ||
| 1940 | switch (index) { | ||
| 1941 | case Tegra::Shader::Pred::NeverExecute: | ||
| 1942 | case Tegra::Shader::Pred::UnusedIndex: | ||
| 1943 | // Writing to these predicates is a no-op | ||
| 1944 | return {}; | ||
| 1945 | } | ||
| 1946 | target = GetPredicate(index); | ||
| 1947 | } else if (const auto flag = std::get_if<InternalFlagNode>(&*dest)) { | ||
| 1948 | target = GetInternalFlag(flag->GetFlag()); | ||
| 1949 | } | ||
| 1950 | |||
| 1951 | code.AddLine("{} = {};", target, Visit(src).AsBool()); | ||
| 1952 | return {}; | ||
| 1953 | } | ||
| 1954 | |||
| 1955 | Expression LogicalAnd(Operation operation) { | ||
| 1956 | return GenerateBinaryInfix(operation, "&&", Type::Bool, Type::Bool, Type::Bool); | ||
| 1957 | } | ||
| 1958 | |||
| 1959 | Expression LogicalOr(Operation operation) { | ||
| 1960 | return GenerateBinaryInfix(operation, "||", Type::Bool, Type::Bool, Type::Bool); | ||
| 1961 | } | ||
| 1962 | |||
| 1963 | Expression LogicalXor(Operation operation) { | ||
| 1964 | return GenerateBinaryInfix(operation, "^^", Type::Bool, Type::Bool, Type::Bool); | ||
| 1965 | } | ||
| 1966 | |||
| 1967 | Expression LogicalNegate(Operation operation) { | ||
| 1968 | return GenerateUnary(operation, "!", Type::Bool, Type::Bool); | ||
| 1969 | } | ||
| 1970 | |||
| 1971 | Expression LogicalPick2(Operation operation) { | ||
| 1972 | return {fmt::format("{}[{}]", VisitOperand(operation, 0).AsBool2(), | ||
| 1973 | VisitOperand(operation, 1).AsUint()), | ||
| 1974 | Type::Bool}; | ||
| 1975 | } | ||
| 1976 | |||
| 1977 | Expression LogicalAnd2(Operation operation) { | ||
| 1978 | return GenerateUnary(operation, "all", Type::Bool, Type::Bool2); | ||
| 1979 | } | ||
| 1980 | |||
| 1981 | template <bool with_nan> | ||
| 1982 | Expression GenerateHalfComparison(Operation operation, std::string_view compare_op) { | ||
| 1983 | Expression comparison = GenerateBinaryCall(operation, compare_op, Type::Bool2, | ||
| 1984 | Type::HalfFloat, Type::HalfFloat); | ||
| 1985 | if constexpr (!with_nan) { | ||
| 1986 | return comparison; | ||
| 1987 | } | ||
| 1988 | return {fmt::format("HalfFloatNanComparison({}, {}, {})", comparison.AsBool2(), | ||
| 1989 | VisitOperand(operation, 0).AsHalfFloat(), | ||
| 1990 | VisitOperand(operation, 1).AsHalfFloat()), | ||
| 1991 | Type::Bool2}; | ||
| 1992 | } | ||
| 1993 | |||
| 1994 | template <bool with_nan> | ||
| 1995 | Expression Logical2HLessThan(Operation operation) { | ||
| 1996 | return GenerateHalfComparison<with_nan>(operation, "lessThan"); | ||
| 1997 | } | ||
| 1998 | |||
| 1999 | template <bool with_nan> | ||
| 2000 | Expression Logical2HEqual(Operation operation) { | ||
| 2001 | return GenerateHalfComparison<with_nan>(operation, "equal"); | ||
| 2002 | } | ||
| 2003 | |||
| 2004 | template <bool with_nan> | ||
| 2005 | Expression Logical2HLessEqual(Operation operation) { | ||
| 2006 | return GenerateHalfComparison<with_nan>(operation, "lessThanEqual"); | ||
| 2007 | } | ||
| 2008 | |||
| 2009 | template <bool with_nan> | ||
| 2010 | Expression Logical2HGreaterThan(Operation operation) { | ||
| 2011 | return GenerateHalfComparison<with_nan>(operation, "greaterThan"); | ||
| 2012 | } | ||
| 2013 | |||
| 2014 | template <bool with_nan> | ||
| 2015 | Expression Logical2HNotEqual(Operation operation) { | ||
| 2016 | return GenerateHalfComparison<with_nan>(operation, "notEqual"); | ||
| 2017 | } | ||
| 2018 | |||
| 2019 | template <bool with_nan> | ||
| 2020 | Expression Logical2HGreaterEqual(Operation operation) { | ||
| 2021 | return GenerateHalfComparison<with_nan>(operation, "greaterThanEqual"); | ||
| 2022 | } | ||
| 2023 | |||
| 2024 | Expression Texture(Operation operation) { | ||
| 2025 | const auto meta = std::get<MetaTexture>(operation.GetMeta()); | ||
| 2026 | const bool separate_dc = meta.sampler.type == TextureType::TextureCube && | ||
| 2027 | meta.sampler.is_array && meta.sampler.is_shadow; | ||
| 2028 | // TODO: Replace this with an array and make GenerateTexture use C++20 std::span | ||
| 2029 | const std::vector<TextureIR> extras{ | ||
| 2030 | TextureOffset{}, | ||
| 2031 | TextureArgument{Type::Float, meta.bias}, | ||
| 2032 | }; | ||
| 2033 | std::string expr = GenerateTexture(operation, "", extras, separate_dc); | ||
| 2034 | if (meta.sampler.is_shadow) { | ||
| 2035 | expr = fmt::format("vec4({})", expr); | ||
| 2036 | } | ||
| 2037 | return {expr + GetSwizzle(meta.element), Type::Float}; | ||
| 2038 | } | ||
| 2039 | |||
| 2040 | Expression TextureLod(Operation operation) { | ||
| 2041 | const auto meta = std::get_if<MetaTexture>(&operation.GetMeta()); | ||
| 2042 | ASSERT(meta); | ||
| 2043 | |||
| 2044 | std::string expr{}; | ||
| 2045 | |||
| 2046 | if (!device.HasTextureShadowLod() && meta->sampler.is_shadow && | ||
| 2047 | ((meta->sampler.type == TextureType::Texture2D && meta->sampler.is_array) || | ||
| 2048 | meta->sampler.type == TextureType::TextureCube)) { | ||
| 2049 | LOG_ERROR(Render_OpenGL, | ||
| 2050 | "Device lacks GL_EXT_texture_shadow_lod, using textureGrad as a workaround"); | ||
| 2051 | expr = GenerateTexture(operation, "Lod", {}); | ||
| 2052 | } else { | ||
| 2053 | expr = GenerateTexture(operation, "Lod", | ||
| 2054 | {TextureArgument{Type::Float, meta->lod}, TextureOffset{}}); | ||
| 2055 | } | ||
| 2056 | |||
| 2057 | if (meta->sampler.is_shadow) { | ||
| 2058 | expr = "vec4(" + expr + ')'; | ||
| 2059 | } | ||
| 2060 | return {expr + GetSwizzle(meta->element), Type::Float}; | ||
| 2061 | } | ||
| 2062 | |||
| 2063 | Expression TextureGather(Operation operation) { | ||
| 2064 | const auto& meta = std::get<MetaTexture>(operation.GetMeta()); | ||
| 2065 | |||
| 2066 | const auto type = meta.sampler.is_shadow ? Type::Float : Type::Int; | ||
| 2067 | const bool separate_dc = meta.sampler.is_shadow; | ||
| 2068 | |||
| 2069 | std::vector<TextureIR> ir_; | ||
| 2070 | if (meta.sampler.is_shadow) { | ||
| 2071 | ir_ = {TextureOffset{}}; | ||
| 2072 | } else { | ||
| 2073 | ir_ = {TextureOffset{}, TextureArgument{type, meta.component}}; | ||
| 2074 | } | ||
| 2075 | return {GenerateTexture(operation, "Gather", ir_, separate_dc) + GetSwizzle(meta.element), | ||
| 2076 | Type::Float}; | ||
| 2077 | } | ||
| 2078 | |||
| 2079 | Expression TextureQueryDimensions(Operation operation) { | ||
| 2080 | const auto meta = std::get_if<MetaTexture>(&operation.GetMeta()); | ||
| 2081 | ASSERT(meta); | ||
| 2082 | |||
| 2083 | const std::string sampler = GetSampler(meta->sampler); | ||
| 2084 | const std::string lod = VisitOperand(operation, 0).AsInt(); | ||
| 2085 | |||
| 2086 | switch (meta->element) { | ||
| 2087 | case 0: | ||
| 2088 | case 1: | ||
| 2089 | return {fmt::format("textureSize({}, {}){}", sampler, lod, GetSwizzle(meta->element)), | ||
| 2090 | Type::Int}; | ||
| 2091 | case 3: | ||
| 2092 | return {fmt::format("textureQueryLevels({})", sampler), Type::Int}; | ||
| 2093 | } | ||
| 2094 | UNREACHABLE(); | ||
| 2095 | return {"0", Type::Int}; | ||
| 2096 | } | ||
| 2097 | |||
| 2098 | Expression TextureQueryLod(Operation operation) { | ||
| 2099 | const auto meta = std::get_if<MetaTexture>(&operation.GetMeta()); | ||
| 2100 | ASSERT(meta); | ||
| 2101 | |||
| 2102 | if (meta->element < 2) { | ||
| 2103 | return {fmt::format("int(({} * vec2(256)){})", | ||
| 2104 | GenerateTexture(operation, "QueryLod", {}), | ||
| 2105 | GetSwizzle(meta->element)), | ||
| 2106 | Type::Int}; | ||
| 2107 | } | ||
| 2108 | return {"0", Type::Int}; | ||
| 2109 | } | ||
| 2110 | |||
| 2111 | Expression TexelFetch(Operation operation) { | ||
| 2112 | constexpr std::array constructors = {"int", "ivec2", "ivec3", "ivec4"}; | ||
| 2113 | const auto meta = std::get_if<MetaTexture>(&operation.GetMeta()); | ||
| 2114 | ASSERT(meta); | ||
| 2115 | UNIMPLEMENTED_IF(meta->sampler.is_array); | ||
| 2116 | const std::size_t count = operation.GetOperandsCount(); | ||
| 2117 | |||
| 2118 | std::string expr = "texelFetch("; | ||
| 2119 | expr += GetSampler(meta->sampler); | ||
| 2120 | expr += ", "; | ||
| 2121 | |||
| 2122 | expr += constructors.at(operation.GetOperandsCount() + (meta->array ? 1 : 0) - 1); | ||
| 2123 | expr += '('; | ||
| 2124 | for (std::size_t i = 0; i < count; ++i) { | ||
| 2125 | if (i > 0) { | ||
| 2126 | expr += ", "; | ||
| 2127 | } | ||
| 2128 | expr += VisitOperand(operation, i).AsInt(); | ||
| 2129 | } | ||
| 2130 | if (meta->array) { | ||
| 2131 | expr += ", "; | ||
| 2132 | expr += Visit(meta->array).AsInt(); | ||
| 2133 | } | ||
| 2134 | expr += ')'; | ||
| 2135 | |||
| 2136 | if (meta->lod && !meta->sampler.is_buffer) { | ||
| 2137 | expr += ", "; | ||
| 2138 | expr += Visit(meta->lod).AsInt(); | ||
| 2139 | } | ||
| 2140 | expr += ')'; | ||
| 2141 | expr += GetSwizzle(meta->element); | ||
| 2142 | |||
| 2143 | return {std::move(expr), Type::Float}; | ||
| 2144 | } | ||
| 2145 | |||
| 2146 | Expression TextureGradient(Operation operation) { | ||
| 2147 | const auto& meta = std::get<MetaTexture>(operation.GetMeta()); | ||
| 2148 | std::string expr = | ||
| 2149 | GenerateTexture(operation, "Grad", {TextureDerivates{}, TextureOffset{}}); | ||
| 2150 | return {std::move(expr) + GetSwizzle(meta.element), Type::Float}; | ||
| 2151 | } | ||
| 2152 | |||
| 2153 | Expression ImageLoad(Operation operation) { | ||
| 2154 | if (!device.HasImageLoadFormatted()) { | ||
| 2155 | LOG_ERROR(Render_OpenGL, | ||
| 2156 | "Device lacks GL_EXT_shader_image_load_formatted, stubbing image load"); | ||
| 2157 | return {"0", Type::Int}; | ||
| 2158 | } | ||
| 2159 | |||
| 2160 | const auto& meta{std::get<MetaImage>(operation.GetMeta())}; | ||
| 2161 | return {fmt::format("imageLoad({}, {}){}", GetImage(meta.image), | ||
| 2162 | BuildIntegerCoordinates(operation), GetSwizzle(meta.element)), | ||
| 2163 | Type::Uint}; | ||
| 2164 | } | ||
| 2165 | |||
| 2166 | Expression ImageStore(Operation operation) { | ||
| 2167 | const auto& meta{std::get<MetaImage>(operation.GetMeta())}; | ||
| 2168 | code.AddLine("imageStore({}, {}, {});", GetImage(meta.image), | ||
| 2169 | BuildIntegerCoordinates(operation), BuildImageValues(operation)); | ||
| 2170 | return {}; | ||
| 2171 | } | ||
| 2172 | |||
| 2173 | template <const std::string_view& opname> | ||
| 2174 | Expression AtomicImage(Operation operation) { | ||
| 2175 | const auto& meta{std::get<MetaImage>(operation.GetMeta())}; | ||
| 2176 | ASSERT(meta.values.size() == 1); | ||
| 2177 | |||
| 2178 | return {fmt::format("imageAtomic{}({}, {}, {})", opname, GetImage(meta.image), | ||
| 2179 | BuildIntegerCoordinates(operation), Visit(meta.values[0]).AsUint()), | ||
| 2180 | Type::Uint}; | ||
| 2181 | } | ||
| 2182 | |||
| 2183 | template <const std::string_view& opname, Type type> | ||
| 2184 | Expression Atomic(Operation operation) { | ||
| 2185 | if ((opname == Func::Min || opname == Func::Max) && type == Type::Int) { | ||
| 2186 | UNIMPLEMENTED_MSG("Unimplemented Min & Max for atomic operations"); | ||
| 2187 | return {}; | ||
| 2188 | } | ||
| 2189 | return {fmt::format("atomic{}({}, {})", opname, Visit(operation[0]).GetCode(), | ||
| 2190 | Visit(operation[1]).AsUint()), | ||
| 2191 | Type::Uint}; | ||
| 2192 | } | ||
| 2193 | |||
| 2194 | template <const std::string_view& opname, Type type> | ||
| 2195 | Expression Reduce(Operation operation) { | ||
| 2196 | code.AddLine("{};", Atomic<opname, type>(operation).GetCode()); | ||
| 2197 | return {}; | ||
| 2198 | } | ||
| 2199 | |||
| 2200 | Expression Branch(Operation operation) { | ||
| 2201 | const auto target = std::get_if<ImmediateNode>(&*operation[0]); | ||
| 2202 | UNIMPLEMENTED_IF(!target); | ||
| 2203 | |||
| 2204 | code.AddLine("jmp_to = 0x{:X}U;", target->GetValue()); | ||
| 2205 | code.AddLine("break;"); | ||
| 2206 | return {}; | ||
| 2207 | } | ||
| 2208 | |||
| 2209 | Expression BranchIndirect(Operation operation) { | ||
| 2210 | const std::string op_a = VisitOperand(operation, 0).AsUint(); | ||
| 2211 | |||
| 2212 | code.AddLine("jmp_to = {};", op_a); | ||
| 2213 | code.AddLine("break;"); | ||
| 2214 | return {}; | ||
| 2215 | } | ||
| 2216 | |||
| 2217 | Expression PushFlowStack(Operation operation) { | ||
| 2218 | const auto stack = std::get<MetaStackClass>(operation.GetMeta()); | ||
| 2219 | const auto target = std::get_if<ImmediateNode>(&*operation[0]); | ||
| 2220 | UNIMPLEMENTED_IF(!target); | ||
| 2221 | |||
| 2222 | code.AddLine("{}[{}++] = 0x{:X}U;", FlowStackName(stack), FlowStackTopName(stack), | ||
| 2223 | target->GetValue()); | ||
| 2224 | return {}; | ||
| 2225 | } | ||
| 2226 | |||
| 2227 | Expression PopFlowStack(Operation operation) { | ||
| 2228 | const auto stack = std::get<MetaStackClass>(operation.GetMeta()); | ||
| 2229 | code.AddLine("jmp_to = {}[--{}];", FlowStackName(stack), FlowStackTopName(stack)); | ||
| 2230 | code.AddLine("break;"); | ||
| 2231 | return {}; | ||
| 2232 | } | ||
| 2233 | |||
| 2234 | void PreExit() { | ||
| 2235 | if (stage != ShaderType::Fragment) { | ||
| 2236 | return; | ||
| 2237 | } | ||
| 2238 | const auto& used_registers = ir.GetRegisters(); | ||
| 2239 | const auto SafeGetRegister = [&](u32 reg) -> Expression { | ||
| 2240 | // TODO(Rodrigo): Replace with contains once C++20 releases | ||
| 2241 | if (used_registers.find(reg) != used_registers.end()) { | ||
| 2242 | return {GetRegister(reg), Type::Float}; | ||
| 2243 | } | ||
| 2244 | return {"0.0f", Type::Float}; | ||
| 2245 | }; | ||
| 2246 | |||
| 2247 | UNIMPLEMENTED_IF_MSG(header.ps.omap.sample_mask != 0, "Sample mask write is unimplemented"); | ||
| 2248 | |||
| 2249 | // Write the color outputs using the data in the shader registers, disabled | ||
| 2250 | // rendertargets/components are skipped in the register assignment. | ||
| 2251 | u32 current_reg = 0; | ||
| 2252 | for (u32 render_target = 0; render_target < Maxwell::NumRenderTargets; ++render_target) { | ||
| 2253 | // TODO(Subv): Figure out how dual-source blending is configured in the Switch. | ||
| 2254 | for (u32 component = 0; component < 4; ++component) { | ||
| 2255 | if (header.ps.IsColorComponentOutputEnabled(render_target, component)) { | ||
| 2256 | code.AddLine("frag_color{}{} = {};", render_target, GetColorSwizzle(component), | ||
| 2257 | SafeGetRegister(current_reg).AsFloat()); | ||
| 2258 | ++current_reg; | ||
| 2259 | } | ||
| 2260 | } | ||
| 2261 | } | ||
| 2262 | if (header.ps.omap.depth) { | ||
| 2263 | // The depth output is always 2 registers after the last color output, and current_reg | ||
| 2264 | // already contains one past the last color register. | ||
| 2265 | code.AddLine("gl_FragDepth = {};", SafeGetRegister(current_reg + 1).AsFloat()); | ||
| 2266 | } | ||
| 2267 | } | ||
| 2268 | |||
| 2269 | Expression Exit(Operation operation) { | ||
| 2270 | PreExit(); | ||
| 2271 | code.AddLine("return;"); | ||
| 2272 | return {}; | ||
| 2273 | } | ||
| 2274 | |||
| 2275 | Expression Discard(Operation operation) { | ||
| 2276 | // Enclose "discard" in a conditional, so that GLSL compilation does not complain | ||
| 2277 | // about unexecuted instructions that may follow this. | ||
| 2278 | code.AddLine("if (true) {{"); | ||
| 2279 | ++code.scope; | ||
| 2280 | code.AddLine("discard;"); | ||
| 2281 | --code.scope; | ||
| 2282 | code.AddLine("}}"); | ||
| 2283 | return {}; | ||
| 2284 | } | ||
| 2285 | |||
| 2286 | Expression EmitVertex(Operation operation) { | ||
| 2287 | ASSERT_MSG(stage == ShaderType::Geometry, | ||
| 2288 | "EmitVertex is expected to be used in a geometry shader."); | ||
| 2289 | code.AddLine("EmitVertex();"); | ||
| 2290 | return {}; | ||
| 2291 | } | ||
| 2292 | |||
| 2293 | Expression EndPrimitive(Operation operation) { | ||
| 2294 | ASSERT_MSG(stage == ShaderType::Geometry, | ||
| 2295 | "EndPrimitive is expected to be used in a geometry shader."); | ||
| 2296 | code.AddLine("EndPrimitive();"); | ||
| 2297 | return {}; | ||
| 2298 | } | ||
| 2299 | |||
| 2300 | Expression InvocationId(Operation operation) { | ||
| 2301 | return {"gl_InvocationID", Type::Int}; | ||
| 2302 | } | ||
| 2303 | |||
| 2304 | Expression YNegate(Operation operation) { | ||
| 2305 | // Y_NEGATE is mapped to this uniform value | ||
| 2306 | return {"gl_FrontMaterial.ambient.a", Type::Float}; | ||
| 2307 | } | ||
| 2308 | |||
| 2309 | template <u32 element> | ||
| 2310 | Expression LocalInvocationId(Operation) { | ||
| 2311 | return {"gl_LocalInvocationID"s + GetSwizzle(element), Type::Uint}; | ||
| 2312 | } | ||
| 2313 | |||
| 2314 | template <u32 element> | ||
| 2315 | Expression WorkGroupId(Operation) { | ||
| 2316 | return {"gl_WorkGroupID"s + GetSwizzle(element), Type::Uint}; | ||
| 2317 | } | ||
| 2318 | |||
| 2319 | Expression BallotThread(Operation operation) { | ||
| 2320 | const std::string value = VisitOperand(operation, 0).AsBool(); | ||
| 2321 | if (!device.HasWarpIntrinsics()) { | ||
| 2322 | LOG_ERROR(Render_OpenGL, "Nvidia vote intrinsics are required by this shader"); | ||
| 2323 | // Stub on non-Nvidia devices by simulating all threads voting the same as the active | ||
| 2324 | // one. | ||
| 2325 | return {fmt::format("({} ? 0xFFFFFFFFU : 0U)", value), Type::Uint}; | ||
| 2326 | } | ||
| 2327 | return {fmt::format("ballotThreadNV({})", value), Type::Uint}; | ||
| 2328 | } | ||
| 2329 | |||
| 2330 | Expression Vote(Operation operation, const char* func) { | ||
| 2331 | const std::string value = VisitOperand(operation, 0).AsBool(); | ||
| 2332 | if (!device.HasWarpIntrinsics()) { | ||
| 2333 | LOG_ERROR(Render_OpenGL, "Nvidia vote intrinsics are required by this shader"); | ||
| 2334 | // Stub with a warp size of one. | ||
| 2335 | return {value, Type::Bool}; | ||
| 2336 | } | ||
| 2337 | return {fmt::format("{}({})", func, value), Type::Bool}; | ||
| 2338 | } | ||
| 2339 | |||
| 2340 | Expression VoteAll(Operation operation) { | ||
| 2341 | return Vote(operation, "allThreadsNV"); | ||
| 2342 | } | ||
| 2343 | |||
| 2344 | Expression VoteAny(Operation operation) { | ||
| 2345 | return Vote(operation, "anyThreadNV"); | ||
| 2346 | } | ||
| 2347 | |||
| 2348 | Expression VoteEqual(Operation operation) { | ||
| 2349 | if (!device.HasWarpIntrinsics()) { | ||
| 2350 | LOG_ERROR(Render_OpenGL, "Nvidia vote intrinsics are required by this shader"); | ||
| 2351 | // We must return true here since a stub for a theoretical warp size of 1. | ||
| 2352 | // This will always return an equal result across all votes. | ||
| 2353 | return {"true", Type::Bool}; | ||
| 2354 | } | ||
| 2355 | return Vote(operation, "allThreadsEqualNV"); | ||
| 2356 | } | ||
| 2357 | |||
| 2358 | Expression ThreadId(Operation operation) { | ||
| 2359 | if (!device.HasShaderBallot()) { | ||
| 2360 | LOG_ERROR(Render_OpenGL, "Shader ballot is unavailable but required by the shader"); | ||
| 2361 | return {"0U", Type::Uint}; | ||
| 2362 | } | ||
| 2363 | return {"gl_SubGroupInvocationARB", Type::Uint}; | ||
| 2364 | } | ||
| 2365 | |||
| 2366 | template <const std::string_view& comparison> | ||
| 2367 | Expression ThreadMask(Operation) { | ||
| 2368 | if (device.HasWarpIntrinsics()) { | ||
| 2369 | return {fmt::format("gl_Thread{}MaskNV", comparison), Type::Uint}; | ||
| 2370 | } | ||
| 2371 | if (device.HasShaderBallot()) { | ||
| 2372 | return {fmt::format("uint(gl_SubGroup{}MaskARB)", comparison), Type::Uint}; | ||
| 2373 | } | ||
| 2374 | LOG_ERROR(Render_OpenGL, "Thread mask intrinsics are required by the shader"); | ||
| 2375 | return {"0U", Type::Uint}; | ||
| 2376 | } | ||
| 2377 | |||
| 2378 | Expression ShuffleIndexed(Operation operation) { | ||
| 2379 | std::string value = VisitOperand(operation, 0).AsFloat(); | ||
| 2380 | |||
| 2381 | if (!device.HasShaderBallot()) { | ||
| 2382 | LOG_ERROR(Render_OpenGL, "Shader ballot is unavailable but required by the shader"); | ||
| 2383 | return {std::move(value), Type::Float}; | ||
| 2384 | } | ||
| 2385 | |||
| 2386 | const std::string index = VisitOperand(operation, 1).AsUint(); | ||
| 2387 | return {fmt::format("readInvocationARB({}, {})", value, index), Type::Float}; | ||
| 2388 | } | ||
| 2389 | |||
| 2390 | Expression Barrier(Operation) { | ||
| 2391 | if (!ir.IsDecompiled()) { | ||
| 2392 | LOG_ERROR(Render_OpenGL, "barrier() used but shader is not decompiled"); | ||
| 2393 | return {}; | ||
| 2394 | } | ||
| 2395 | code.AddLine("barrier();"); | ||
| 2396 | return {}; | ||
| 2397 | } | ||
| 2398 | |||
| 2399 | Expression MemoryBarrierGroup(Operation) { | ||
| 2400 | code.AddLine("groupMemoryBarrier();"); | ||
| 2401 | return {}; | ||
| 2402 | } | ||
| 2403 | |||
| 2404 | Expression MemoryBarrierGlobal(Operation) { | ||
| 2405 | code.AddLine("memoryBarrier();"); | ||
| 2406 | return {}; | ||
| 2407 | } | ||
| 2408 | |||
| 2409 | struct Func final { | ||
| 2410 | Func() = delete; | ||
| 2411 | ~Func() = delete; | ||
| 2412 | |||
| 2413 | static constexpr std::string_view LessThan = "<"; | ||
| 2414 | static constexpr std::string_view Equal = "=="; | ||
| 2415 | static constexpr std::string_view LessEqual = "<="; | ||
| 2416 | static constexpr std::string_view GreaterThan = ">"; | ||
| 2417 | static constexpr std::string_view NotEqual = "!="; | ||
| 2418 | static constexpr std::string_view GreaterEqual = ">="; | ||
| 2419 | |||
| 2420 | static constexpr std::string_view Eq = "Eq"; | ||
| 2421 | static constexpr std::string_view Ge = "Ge"; | ||
| 2422 | static constexpr std::string_view Gt = "Gt"; | ||
| 2423 | static constexpr std::string_view Le = "Le"; | ||
| 2424 | static constexpr std::string_view Lt = "Lt"; | ||
| 2425 | |||
| 2426 | static constexpr std::string_view Add = "Add"; | ||
| 2427 | static constexpr std::string_view Min = "Min"; | ||
| 2428 | static constexpr std::string_view Max = "Max"; | ||
| 2429 | static constexpr std::string_view And = "And"; | ||
| 2430 | static constexpr std::string_view Or = "Or"; | ||
| 2431 | static constexpr std::string_view Xor = "Xor"; | ||
| 2432 | static constexpr std::string_view Exchange = "Exchange"; | ||
| 2433 | }; | ||
| 2434 | |||
| 2435 | static constexpr std::array operation_decompilers = { | ||
| 2436 | &GLSLDecompiler::Assign, | ||
| 2437 | |||
| 2438 | &GLSLDecompiler::Select, | ||
| 2439 | |||
| 2440 | &GLSLDecompiler::Add<Type::Float>, | ||
| 2441 | &GLSLDecompiler::Mul<Type::Float>, | ||
| 2442 | &GLSLDecompiler::Div<Type::Float>, | ||
| 2443 | &GLSLDecompiler::Fma<Type::Float>, | ||
| 2444 | &GLSLDecompiler::Negate<Type::Float>, | ||
| 2445 | &GLSLDecompiler::Absolute<Type::Float>, | ||
| 2446 | &GLSLDecompiler::FClamp, | ||
| 2447 | &GLSLDecompiler::FCastHalf0, | ||
| 2448 | &GLSLDecompiler::FCastHalf1, | ||
| 2449 | &GLSLDecompiler::Min<Type::Float>, | ||
| 2450 | &GLSLDecompiler::Max<Type::Float>, | ||
| 2451 | &GLSLDecompiler::FCos, | ||
| 2452 | &GLSLDecompiler::FSin, | ||
| 2453 | &GLSLDecompiler::FExp2, | ||
| 2454 | &GLSLDecompiler::FLog2, | ||
| 2455 | &GLSLDecompiler::FInverseSqrt, | ||
| 2456 | &GLSLDecompiler::FSqrt, | ||
| 2457 | &GLSLDecompiler::FRoundEven, | ||
| 2458 | &GLSLDecompiler::FFloor, | ||
| 2459 | &GLSLDecompiler::FCeil, | ||
| 2460 | &GLSLDecompiler::FTrunc, | ||
| 2461 | &GLSLDecompiler::FCastInteger<Type::Int>, | ||
| 2462 | &GLSLDecompiler::FCastInteger<Type::Uint>, | ||
| 2463 | &GLSLDecompiler::FSwizzleAdd, | ||
| 2464 | |||
| 2465 | &GLSLDecompiler::Add<Type::Int>, | ||
| 2466 | &GLSLDecompiler::Mul<Type::Int>, | ||
| 2467 | &GLSLDecompiler::Div<Type::Int>, | ||
| 2468 | &GLSLDecompiler::Negate<Type::Int>, | ||
| 2469 | &GLSLDecompiler::Absolute<Type::Int>, | ||
| 2470 | &GLSLDecompiler::Min<Type::Int>, | ||
| 2471 | &GLSLDecompiler::Max<Type::Int>, | ||
| 2472 | |||
| 2473 | &GLSLDecompiler::ICastFloat, | ||
| 2474 | &GLSLDecompiler::ICastUnsigned, | ||
| 2475 | &GLSLDecompiler::LogicalShiftLeft<Type::Int>, | ||
| 2476 | &GLSLDecompiler::ILogicalShiftRight, | ||
| 2477 | &GLSLDecompiler::IArithmeticShiftRight, | ||
| 2478 | &GLSLDecompiler::BitwiseAnd<Type::Int>, | ||
| 2479 | &GLSLDecompiler::BitwiseOr<Type::Int>, | ||
| 2480 | &GLSLDecompiler::BitwiseXor<Type::Int>, | ||
| 2481 | &GLSLDecompiler::BitwiseNot<Type::Int>, | ||
| 2482 | &GLSLDecompiler::BitfieldInsert<Type::Int>, | ||
| 2483 | &GLSLDecompiler::BitfieldExtract<Type::Int>, | ||
| 2484 | &GLSLDecompiler::BitCount<Type::Int>, | ||
| 2485 | &GLSLDecompiler::BitMSB<Type::Int>, | ||
| 2486 | |||
| 2487 | &GLSLDecompiler::Add<Type::Uint>, | ||
| 2488 | &GLSLDecompiler::Mul<Type::Uint>, | ||
| 2489 | &GLSLDecompiler::Div<Type::Uint>, | ||
| 2490 | &GLSLDecompiler::Min<Type::Uint>, | ||
| 2491 | &GLSLDecompiler::Max<Type::Uint>, | ||
| 2492 | &GLSLDecompiler::UCastFloat, | ||
| 2493 | &GLSLDecompiler::UCastSigned, | ||
| 2494 | &GLSLDecompiler::LogicalShiftLeft<Type::Uint>, | ||
| 2495 | &GLSLDecompiler::UShiftRight, | ||
| 2496 | &GLSLDecompiler::UShiftRight, | ||
| 2497 | &GLSLDecompiler::BitwiseAnd<Type::Uint>, | ||
| 2498 | &GLSLDecompiler::BitwiseOr<Type::Uint>, | ||
| 2499 | &GLSLDecompiler::BitwiseXor<Type::Uint>, | ||
| 2500 | &GLSLDecompiler::BitwiseNot<Type::Uint>, | ||
| 2501 | &GLSLDecompiler::BitfieldInsert<Type::Uint>, | ||
| 2502 | &GLSLDecompiler::BitfieldExtract<Type::Uint>, | ||
| 2503 | &GLSLDecompiler::BitCount<Type::Uint>, | ||
| 2504 | &GLSLDecompiler::BitMSB<Type::Uint>, | ||
| 2505 | |||
| 2506 | &GLSLDecompiler::Add<Type::HalfFloat>, | ||
| 2507 | &GLSLDecompiler::Mul<Type::HalfFloat>, | ||
| 2508 | &GLSLDecompiler::Fma<Type::HalfFloat>, | ||
| 2509 | &GLSLDecompiler::Absolute<Type::HalfFloat>, | ||
| 2510 | &GLSLDecompiler::HNegate, | ||
| 2511 | &GLSLDecompiler::HClamp, | ||
| 2512 | &GLSLDecompiler::HCastFloat, | ||
| 2513 | &GLSLDecompiler::HUnpack, | ||
| 2514 | &GLSLDecompiler::HMergeF32, | ||
| 2515 | &GLSLDecompiler::HMergeH0, | ||
| 2516 | &GLSLDecompiler::HMergeH1, | ||
| 2517 | &GLSLDecompiler::HPack2, | ||
| 2518 | |||
| 2519 | &GLSLDecompiler::LogicalAssign, | ||
| 2520 | &GLSLDecompiler::LogicalAnd, | ||
| 2521 | &GLSLDecompiler::LogicalOr, | ||
| 2522 | &GLSLDecompiler::LogicalXor, | ||
| 2523 | &GLSLDecompiler::LogicalNegate, | ||
| 2524 | &GLSLDecompiler::LogicalPick2, | ||
| 2525 | &GLSLDecompiler::LogicalAnd2, | ||
| 2526 | |||
| 2527 | &GLSLDecompiler::Comparison<Func::LessThan, Type::Float, false>, | ||
| 2528 | &GLSLDecompiler::Comparison<Func::Equal, Type::Float, false>, | ||
| 2529 | &GLSLDecompiler::Comparison<Func::LessEqual, Type::Float, false>, | ||
| 2530 | &GLSLDecompiler::Comparison<Func::GreaterThan, Type::Float, false>, | ||
| 2531 | &GLSLDecompiler::Comparison<Func::NotEqual, Type::Float, false>, | ||
| 2532 | &GLSLDecompiler::Comparison<Func::GreaterEqual, Type::Float, false>, | ||
| 2533 | &GLSLDecompiler::FOrdered, | ||
| 2534 | &GLSLDecompiler::FUnordered, | ||
| 2535 | &GLSLDecompiler::Comparison<Func::LessThan, Type::Float, true>, | ||
| 2536 | &GLSLDecompiler::Comparison<Func::Equal, Type::Float, true>, | ||
| 2537 | &GLSLDecompiler::Comparison<Func::LessEqual, Type::Float, true>, | ||
| 2538 | &GLSLDecompiler::Comparison<Func::GreaterThan, Type::Float, true>, | ||
| 2539 | &GLSLDecompiler::Comparison<Func::NotEqual, Type::Float, true>, | ||
| 2540 | &GLSLDecompiler::Comparison<Func::GreaterEqual, Type::Float, true>, | ||
| 2541 | |||
| 2542 | &GLSLDecompiler::Comparison<Func::LessThan, Type::Int>, | ||
| 2543 | &GLSLDecompiler::Comparison<Func::Equal, Type::Int>, | ||
| 2544 | &GLSLDecompiler::Comparison<Func::LessEqual, Type::Int>, | ||
| 2545 | &GLSLDecompiler::Comparison<Func::GreaterThan, Type::Int>, | ||
| 2546 | &GLSLDecompiler::Comparison<Func::NotEqual, Type::Int>, | ||
| 2547 | &GLSLDecompiler::Comparison<Func::GreaterEqual, Type::Int>, | ||
| 2548 | |||
| 2549 | &GLSLDecompiler::Comparison<Func::LessThan, Type::Uint>, | ||
| 2550 | &GLSLDecompiler::Comparison<Func::Equal, Type::Uint>, | ||
| 2551 | &GLSLDecompiler::Comparison<Func::LessEqual, Type::Uint>, | ||
| 2552 | &GLSLDecompiler::Comparison<Func::GreaterThan, Type::Uint>, | ||
| 2553 | &GLSLDecompiler::Comparison<Func::NotEqual, Type::Uint>, | ||
| 2554 | &GLSLDecompiler::Comparison<Func::GreaterEqual, Type::Uint>, | ||
| 2555 | |||
| 2556 | &GLSLDecompiler::LogicalAddCarry, | ||
| 2557 | |||
| 2558 | &GLSLDecompiler::Logical2HLessThan<false>, | ||
| 2559 | &GLSLDecompiler::Logical2HEqual<false>, | ||
| 2560 | &GLSLDecompiler::Logical2HLessEqual<false>, | ||
| 2561 | &GLSLDecompiler::Logical2HGreaterThan<false>, | ||
| 2562 | &GLSLDecompiler::Logical2HNotEqual<false>, | ||
| 2563 | &GLSLDecompiler::Logical2HGreaterEqual<false>, | ||
| 2564 | &GLSLDecompiler::Logical2HLessThan<true>, | ||
| 2565 | &GLSLDecompiler::Logical2HEqual<true>, | ||
| 2566 | &GLSLDecompiler::Logical2HLessEqual<true>, | ||
| 2567 | &GLSLDecompiler::Logical2HGreaterThan<true>, | ||
| 2568 | &GLSLDecompiler::Logical2HNotEqual<true>, | ||
| 2569 | &GLSLDecompiler::Logical2HGreaterEqual<true>, | ||
| 2570 | |||
| 2571 | &GLSLDecompiler::Texture, | ||
| 2572 | &GLSLDecompiler::TextureLod, | ||
| 2573 | &GLSLDecompiler::TextureGather, | ||
| 2574 | &GLSLDecompiler::TextureQueryDimensions, | ||
| 2575 | &GLSLDecompiler::TextureQueryLod, | ||
| 2576 | &GLSLDecompiler::TexelFetch, | ||
| 2577 | &GLSLDecompiler::TextureGradient, | ||
| 2578 | |||
| 2579 | &GLSLDecompiler::ImageLoad, | ||
| 2580 | &GLSLDecompiler::ImageStore, | ||
| 2581 | |||
| 2582 | &GLSLDecompiler::AtomicImage<Func::Add>, | ||
| 2583 | &GLSLDecompiler::AtomicImage<Func::And>, | ||
| 2584 | &GLSLDecompiler::AtomicImage<Func::Or>, | ||
| 2585 | &GLSLDecompiler::AtomicImage<Func::Xor>, | ||
| 2586 | &GLSLDecompiler::AtomicImage<Func::Exchange>, | ||
| 2587 | |||
| 2588 | &GLSLDecompiler::Atomic<Func::Exchange, Type::Uint>, | ||
| 2589 | &GLSLDecompiler::Atomic<Func::Add, Type::Uint>, | ||
| 2590 | &GLSLDecompiler::Atomic<Func::Min, Type::Uint>, | ||
| 2591 | &GLSLDecompiler::Atomic<Func::Max, Type::Uint>, | ||
| 2592 | &GLSLDecompiler::Atomic<Func::And, Type::Uint>, | ||
| 2593 | &GLSLDecompiler::Atomic<Func::Or, Type::Uint>, | ||
| 2594 | &GLSLDecompiler::Atomic<Func::Xor, Type::Uint>, | ||
| 2595 | |||
| 2596 | &GLSLDecompiler::Atomic<Func::Exchange, Type::Int>, | ||
| 2597 | &GLSLDecompiler::Atomic<Func::Add, Type::Int>, | ||
| 2598 | &GLSLDecompiler::Atomic<Func::Min, Type::Int>, | ||
| 2599 | &GLSLDecompiler::Atomic<Func::Max, Type::Int>, | ||
| 2600 | &GLSLDecompiler::Atomic<Func::And, Type::Int>, | ||
| 2601 | &GLSLDecompiler::Atomic<Func::Or, Type::Int>, | ||
| 2602 | &GLSLDecompiler::Atomic<Func::Xor, Type::Int>, | ||
| 2603 | |||
| 2604 | &GLSLDecompiler::Reduce<Func::Add, Type::Uint>, | ||
| 2605 | &GLSLDecompiler::Reduce<Func::Min, Type::Uint>, | ||
| 2606 | &GLSLDecompiler::Reduce<Func::Max, Type::Uint>, | ||
| 2607 | &GLSLDecompiler::Reduce<Func::And, Type::Uint>, | ||
| 2608 | &GLSLDecompiler::Reduce<Func::Or, Type::Uint>, | ||
| 2609 | &GLSLDecompiler::Reduce<Func::Xor, Type::Uint>, | ||
| 2610 | |||
| 2611 | &GLSLDecompiler::Reduce<Func::Add, Type::Int>, | ||
| 2612 | &GLSLDecompiler::Reduce<Func::Min, Type::Int>, | ||
| 2613 | &GLSLDecompiler::Reduce<Func::Max, Type::Int>, | ||
| 2614 | &GLSLDecompiler::Reduce<Func::And, Type::Int>, | ||
| 2615 | &GLSLDecompiler::Reduce<Func::Or, Type::Int>, | ||
| 2616 | &GLSLDecompiler::Reduce<Func::Xor, Type::Int>, | ||
| 2617 | |||
| 2618 | &GLSLDecompiler::Branch, | ||
| 2619 | &GLSLDecompiler::BranchIndirect, | ||
| 2620 | &GLSLDecompiler::PushFlowStack, | ||
| 2621 | &GLSLDecompiler::PopFlowStack, | ||
| 2622 | &GLSLDecompiler::Exit, | ||
| 2623 | &GLSLDecompiler::Discard, | ||
| 2624 | |||
| 2625 | &GLSLDecompiler::EmitVertex, | ||
| 2626 | &GLSLDecompiler::EndPrimitive, | ||
| 2627 | |||
| 2628 | &GLSLDecompiler::InvocationId, | ||
| 2629 | &GLSLDecompiler::YNegate, | ||
| 2630 | &GLSLDecompiler::LocalInvocationId<0>, | ||
| 2631 | &GLSLDecompiler::LocalInvocationId<1>, | ||
| 2632 | &GLSLDecompiler::LocalInvocationId<2>, | ||
| 2633 | &GLSLDecompiler::WorkGroupId<0>, | ||
| 2634 | &GLSLDecompiler::WorkGroupId<1>, | ||
| 2635 | &GLSLDecompiler::WorkGroupId<2>, | ||
| 2636 | |||
| 2637 | &GLSLDecompiler::BallotThread, | ||
| 2638 | &GLSLDecompiler::VoteAll, | ||
| 2639 | &GLSLDecompiler::VoteAny, | ||
| 2640 | &GLSLDecompiler::VoteEqual, | ||
| 2641 | |||
| 2642 | &GLSLDecompiler::ThreadId, | ||
| 2643 | &GLSLDecompiler::ThreadMask<Func::Eq>, | ||
| 2644 | &GLSLDecompiler::ThreadMask<Func::Ge>, | ||
| 2645 | &GLSLDecompiler::ThreadMask<Func::Gt>, | ||
| 2646 | &GLSLDecompiler::ThreadMask<Func::Le>, | ||
| 2647 | &GLSLDecompiler::ThreadMask<Func::Lt>, | ||
| 2648 | &GLSLDecompiler::ShuffleIndexed, | ||
| 2649 | |||
| 2650 | &GLSLDecompiler::Barrier, | ||
| 2651 | &GLSLDecompiler::MemoryBarrierGroup, | ||
| 2652 | &GLSLDecompiler::MemoryBarrierGlobal, | ||
| 2653 | }; | ||
| 2654 | static_assert(operation_decompilers.size() == static_cast<std::size_t>(OperationCode::Amount)); | ||
| 2655 | |||
| 2656 | std::string GetRegister(u32 index) const { | ||
| 2657 | return AppendSuffix(index, "gpr"); | ||
| 2658 | } | ||
| 2659 | |||
| 2660 | std::string GetCustomVariable(u32 index) const { | ||
| 2661 | return AppendSuffix(index, "custom_var"); | ||
| 2662 | } | ||
| 2663 | |||
| 2664 | std::string GetPredicate(Tegra::Shader::Pred pred) const { | ||
| 2665 | return AppendSuffix(static_cast<u32>(pred), "pred"); | ||
| 2666 | } | ||
| 2667 | |||
| 2668 | std::string GetGenericInputAttribute(Attribute::Index attribute) const { | ||
| 2669 | return AppendSuffix(GetGenericAttributeIndex(attribute), INPUT_ATTRIBUTE_NAME); | ||
| 2670 | } | ||
| 2671 | |||
| 2672 | std::unordered_map<u8, GenericVaryingDescription> varying_description; | ||
| 2673 | |||
| 2674 | std::string GetGenericOutputAttribute(Attribute::Index attribute, std::size_t element) const { | ||
| 2675 | const u8 offset = static_cast<u8>(GetGenericAttributeIndex(attribute) * 4 + element); | ||
| 2676 | const auto& description = varying_description.at(offset); | ||
| 2677 | if (description.is_scalar) { | ||
| 2678 | return description.name; | ||
| 2679 | } | ||
| 2680 | return fmt::format("{}[{}]", description.name, element - description.first_element); | ||
| 2681 | } | ||
| 2682 | |||
| 2683 | std::string GetConstBuffer(u32 index) const { | ||
| 2684 | return AppendSuffix(index, "cbuf"); | ||
| 2685 | } | ||
| 2686 | |||
| 2687 | std::string GetGlobalMemory(const GlobalMemoryBase& descriptor) const { | ||
| 2688 | return fmt::format("gmem_{}_{}_{}", descriptor.cbuf_index, descriptor.cbuf_offset, suffix); | ||
| 2689 | } | ||
| 2690 | |||
| 2691 | std::string GetGlobalMemoryBlock(const GlobalMemoryBase& descriptor) const { | ||
| 2692 | return fmt::format("gmem_block_{}_{}_{}", descriptor.cbuf_index, descriptor.cbuf_offset, | ||
| 2693 | suffix); | ||
| 2694 | } | ||
| 2695 | |||
| 2696 | std::string GetConstBufferBlock(u32 index) const { | ||
| 2697 | return AppendSuffix(index, "cbuf_block"); | ||
| 2698 | } | ||
| 2699 | |||
| 2700 | std::string GetLocalMemory() const { | ||
| 2701 | if (suffix.empty()) { | ||
| 2702 | return "lmem"; | ||
| 2703 | } else { | ||
| 2704 | return "lmem_" + std::string{suffix}; | ||
| 2705 | } | ||
| 2706 | } | ||
| 2707 | |||
| 2708 | std::string GetInternalFlag(InternalFlag flag) const { | ||
| 2709 | constexpr std::array InternalFlagNames = {"zero_flag", "sign_flag", "carry_flag", | ||
| 2710 | "overflow_flag"}; | ||
| 2711 | const auto index = static_cast<u32>(flag); | ||
| 2712 | ASSERT(index < static_cast<u32>(InternalFlag::Amount)); | ||
| 2713 | |||
| 2714 | if (suffix.empty()) { | ||
| 2715 | return InternalFlagNames[index]; | ||
| 2716 | } else { | ||
| 2717 | return fmt::format("{}_{}", InternalFlagNames[index], suffix); | ||
| 2718 | } | ||
| 2719 | } | ||
| 2720 | |||
| 2721 | std::string GetSampler(const SamplerEntry& sampler) const { | ||
| 2722 | return AppendSuffix(sampler.index, "sampler"); | ||
| 2723 | } | ||
| 2724 | |||
| 2725 | std::string GetImage(const ImageEntry& image) const { | ||
| 2726 | return AppendSuffix(image.index, "image"); | ||
| 2727 | } | ||
| 2728 | |||
| 2729 | std::string AppendSuffix(u32 index, std::string_view name) const { | ||
| 2730 | if (suffix.empty()) { | ||
| 2731 | return fmt::format("{}{}", name, index); | ||
| 2732 | } else { | ||
| 2733 | return fmt::format("{}{}_{}", name, index, suffix); | ||
| 2734 | } | ||
| 2735 | } | ||
| 2736 | |||
| 2737 | u32 GetNumPhysicalInputAttributes() const { | ||
| 2738 | return stage == ShaderType::Vertex ? GetNumPhysicalAttributes() : GetNumPhysicalVaryings(); | ||
| 2739 | } | ||
| 2740 | |||
| 2741 | u32 GetNumPhysicalAttributes() const { | ||
| 2742 | return std::min<u32>(device.GetMaxVertexAttributes(), Maxwell::NumVertexAttributes); | ||
| 2743 | } | ||
| 2744 | |||
| 2745 | u32 GetNumPhysicalVaryings() const { | ||
| 2746 | return std::min<u32>(device.GetMaxVaryings(), Maxwell::NumVaryings); | ||
| 2747 | } | ||
| 2748 | |||
| 2749 | const Device& device; | ||
| 2750 | const ShaderIR& ir; | ||
| 2751 | const Registry& registry; | ||
| 2752 | const ShaderType stage; | ||
| 2753 | const std::string_view identifier; | ||
| 2754 | const std::string_view suffix; | ||
| 2755 | const Header header; | ||
| 2756 | std::unordered_map<u8, VaryingTFB> transform_feedback; | ||
| 2757 | |||
| 2758 | ShaderWriter code; | ||
| 2759 | |||
| 2760 | std::optional<u32> max_input_vertices; | ||
| 2761 | }; | ||
| 2762 | |||
| 2763 | std::string GetFlowVariable(u32 index) { | ||
| 2764 | return fmt::format("flow_var{}", index); | ||
| 2765 | } | ||
| 2766 | |||
| 2767 | class ExprDecompiler { | ||
| 2768 | public: | ||
| 2769 | explicit ExprDecompiler(GLSLDecompiler& decomp_) : decomp{decomp_} {} | ||
| 2770 | |||
| 2771 | void operator()(const ExprAnd& expr) { | ||
| 2772 | inner += '('; | ||
| 2773 | std::visit(*this, *expr.operand1); | ||
| 2774 | inner += " && "; | ||
| 2775 | std::visit(*this, *expr.operand2); | ||
| 2776 | inner += ')'; | ||
| 2777 | } | ||
| 2778 | |||
| 2779 | void operator()(const ExprOr& expr) { | ||
| 2780 | inner += '('; | ||
| 2781 | std::visit(*this, *expr.operand1); | ||
| 2782 | inner += " || "; | ||
| 2783 | std::visit(*this, *expr.operand2); | ||
| 2784 | inner += ')'; | ||
| 2785 | } | ||
| 2786 | |||
| 2787 | void operator()(const ExprNot& expr) { | ||
| 2788 | inner += '!'; | ||
| 2789 | std::visit(*this, *expr.operand1); | ||
| 2790 | } | ||
| 2791 | |||
| 2792 | void operator()(const ExprPredicate& expr) { | ||
| 2793 | const auto pred = static_cast<Tegra::Shader::Pred>(expr.predicate); | ||
| 2794 | inner += decomp.GetPredicate(pred); | ||
| 2795 | } | ||
| 2796 | |||
| 2797 | void operator()(const ExprCondCode& expr) { | ||
| 2798 | inner += decomp.Visit(decomp.ir.GetConditionCode(expr.cc)).AsBool(); | ||
| 2799 | } | ||
| 2800 | |||
| 2801 | void operator()(const ExprVar& expr) { | ||
| 2802 | inner += GetFlowVariable(expr.var_index); | ||
| 2803 | } | ||
| 2804 | |||
| 2805 | void operator()(const ExprBoolean& expr) { | ||
| 2806 | inner += expr.value ? "true" : "false"; | ||
| 2807 | } | ||
| 2808 | |||
| 2809 | void operator()(VideoCommon::Shader::ExprGprEqual& expr) { | ||
| 2810 | inner += fmt::format("(ftou({}) == {})", decomp.GetRegister(expr.gpr), expr.value); | ||
| 2811 | } | ||
| 2812 | |||
| 2813 | const std::string& GetResult() const { | ||
| 2814 | return inner; | ||
| 2815 | } | ||
| 2816 | |||
| 2817 | private: | ||
| 2818 | GLSLDecompiler& decomp; | ||
| 2819 | std::string inner; | ||
| 2820 | }; | ||
| 2821 | |||
| 2822 | class ASTDecompiler { | ||
| 2823 | public: | ||
| 2824 | explicit ASTDecompiler(GLSLDecompiler& decomp_) : decomp{decomp_} {} | ||
| 2825 | |||
| 2826 | void operator()(const ASTProgram& ast) { | ||
| 2827 | ASTNode current = ast.nodes.GetFirst(); | ||
| 2828 | while (current) { | ||
| 2829 | Visit(current); | ||
| 2830 | current = current->GetNext(); | ||
| 2831 | } | ||
| 2832 | } | ||
| 2833 | |||
| 2834 | void operator()(const ASTIfThen& ast) { | ||
| 2835 | ExprDecompiler expr_parser{decomp}; | ||
| 2836 | std::visit(expr_parser, *ast.condition); | ||
| 2837 | decomp.code.AddLine("if ({}) {{", expr_parser.GetResult()); | ||
| 2838 | decomp.code.scope++; | ||
| 2839 | ASTNode current = ast.nodes.GetFirst(); | ||
| 2840 | while (current) { | ||
| 2841 | Visit(current); | ||
| 2842 | current = current->GetNext(); | ||
| 2843 | } | ||
| 2844 | decomp.code.scope--; | ||
| 2845 | decomp.code.AddLine("}}"); | ||
| 2846 | } | ||
| 2847 | |||
| 2848 | void operator()(const ASTIfElse& ast) { | ||
| 2849 | decomp.code.AddLine("else {{"); | ||
| 2850 | decomp.code.scope++; | ||
| 2851 | ASTNode current = ast.nodes.GetFirst(); | ||
| 2852 | while (current) { | ||
| 2853 | Visit(current); | ||
| 2854 | current = current->GetNext(); | ||
| 2855 | } | ||
| 2856 | decomp.code.scope--; | ||
| 2857 | decomp.code.AddLine("}}"); | ||
| 2858 | } | ||
| 2859 | |||
| 2860 | void operator()([[maybe_unused]] const ASTBlockEncoded& ast) { | ||
| 2861 | UNREACHABLE(); | ||
| 2862 | } | ||
| 2863 | |||
| 2864 | void operator()(const ASTBlockDecoded& ast) { | ||
| 2865 | decomp.VisitBlock(ast.nodes); | ||
| 2866 | } | ||
| 2867 | |||
| 2868 | void operator()(const ASTVarSet& ast) { | ||
| 2869 | ExprDecompiler expr_parser{decomp}; | ||
| 2870 | std::visit(expr_parser, *ast.condition); | ||
| 2871 | decomp.code.AddLine("{} = {};", GetFlowVariable(ast.index), expr_parser.GetResult()); | ||
| 2872 | } | ||
| 2873 | |||
| 2874 | void operator()(const ASTLabel& ast) { | ||
| 2875 | decomp.code.AddLine("// Label_{}:", ast.index); | ||
| 2876 | } | ||
| 2877 | |||
| 2878 | void operator()([[maybe_unused]] const ASTGoto& ast) { | ||
| 2879 | UNREACHABLE(); | ||
| 2880 | } | ||
| 2881 | |||
| 2882 | void operator()(const ASTDoWhile& ast) { | ||
| 2883 | ExprDecompiler expr_parser{decomp}; | ||
| 2884 | std::visit(expr_parser, *ast.condition); | ||
| 2885 | decomp.code.AddLine("do {{"); | ||
| 2886 | decomp.code.scope++; | ||
| 2887 | ASTNode current = ast.nodes.GetFirst(); | ||
| 2888 | while (current) { | ||
| 2889 | Visit(current); | ||
| 2890 | current = current->GetNext(); | ||
| 2891 | } | ||
| 2892 | decomp.code.scope--; | ||
| 2893 | decomp.code.AddLine("}} while({});", expr_parser.GetResult()); | ||
| 2894 | } | ||
| 2895 | |||
| 2896 | void operator()(const ASTReturn& ast) { | ||
| 2897 | const bool is_true = VideoCommon::Shader::ExprIsTrue(ast.condition); | ||
| 2898 | if (!is_true) { | ||
| 2899 | ExprDecompiler expr_parser{decomp}; | ||
| 2900 | std::visit(expr_parser, *ast.condition); | ||
| 2901 | decomp.code.AddLine("if ({}) {{", expr_parser.GetResult()); | ||
| 2902 | decomp.code.scope++; | ||
| 2903 | } | ||
| 2904 | if (ast.kills) { | ||
| 2905 | decomp.code.AddLine("discard;"); | ||
| 2906 | } else { | ||
| 2907 | decomp.PreExit(); | ||
| 2908 | decomp.code.AddLine("return;"); | ||
| 2909 | } | ||
| 2910 | if (!is_true) { | ||
| 2911 | decomp.code.scope--; | ||
| 2912 | decomp.code.AddLine("}}"); | ||
| 2913 | } | ||
| 2914 | } | ||
| 2915 | |||
| 2916 | void operator()(const ASTBreak& ast) { | ||
| 2917 | const bool is_true = VideoCommon::Shader::ExprIsTrue(ast.condition); | ||
| 2918 | if (!is_true) { | ||
| 2919 | ExprDecompiler expr_parser{decomp}; | ||
| 2920 | std::visit(expr_parser, *ast.condition); | ||
| 2921 | decomp.code.AddLine("if ({}) {{", expr_parser.GetResult()); | ||
| 2922 | decomp.code.scope++; | ||
| 2923 | } | ||
| 2924 | decomp.code.AddLine("break;"); | ||
| 2925 | if (!is_true) { | ||
| 2926 | decomp.code.scope--; | ||
| 2927 | decomp.code.AddLine("}}"); | ||
| 2928 | } | ||
| 2929 | } | ||
| 2930 | |||
| 2931 | void Visit(const ASTNode& node) { | ||
| 2932 | std::visit(*this, *node->GetInnerData()); | ||
| 2933 | } | ||
| 2934 | |||
| 2935 | private: | ||
| 2936 | GLSLDecompiler& decomp; | ||
| 2937 | }; | ||
| 2938 | |||
| 2939 | void GLSLDecompiler::DecompileAST() { | ||
| 2940 | const u32 num_flow_variables = ir.GetASTNumVariables(); | ||
| 2941 | for (u32 i = 0; i < num_flow_variables; i++) { | ||
| 2942 | code.AddLine("bool {} = false;", GetFlowVariable(i)); | ||
| 2943 | } | ||
| 2944 | |||
| 2945 | ASTDecompiler decompiler{*this}; | ||
| 2946 | decompiler.Visit(ir.GetASTProgram()); | ||
| 2947 | } | ||
| 2948 | |||
| 2949 | } // Anonymous namespace | ||
| 2950 | |||
| 2951 | ShaderEntries MakeEntries(const Device& device, const ShaderIR& ir, ShaderType stage) { | ||
| 2952 | ShaderEntries entries; | ||
| 2953 | for (const auto& cbuf : ir.GetConstantBuffers()) { | ||
| 2954 | entries.const_buffers.emplace_back(cbuf.second.GetMaxOffset(), cbuf.second.IsIndirect(), | ||
| 2955 | cbuf.first); | ||
| 2956 | } | ||
| 2957 | for (const auto& [base, usage] : ir.GetGlobalMemory()) { | ||
| 2958 | entries.global_memory_entries.emplace_back(base.cbuf_index, base.cbuf_offset, usage.is_read, | ||
| 2959 | usage.is_written); | ||
| 2960 | } | ||
| 2961 | for (const auto& sampler : ir.GetSamplers()) { | ||
| 2962 | entries.samplers.emplace_back(sampler); | ||
| 2963 | } | ||
| 2964 | for (const auto& image : ir.GetImages()) { | ||
| 2965 | entries.images.emplace_back(image); | ||
| 2966 | } | ||
| 2967 | const auto clip_distances = ir.GetClipDistances(); | ||
| 2968 | for (std::size_t i = 0; i < std::size(clip_distances); ++i) { | ||
| 2969 | entries.clip_distances = (clip_distances[i] ? 1U : 0U) << i; | ||
| 2970 | } | ||
| 2971 | for (const auto& buffer : entries.const_buffers) { | ||
| 2972 | entries.enabled_uniform_buffers |= 1U << buffer.GetIndex(); | ||
| 2973 | } | ||
| 2974 | entries.shader_length = ir.GetLength(); | ||
| 2975 | return entries; | ||
| 2976 | } | ||
| 2977 | |||
| 2978 | std::string DecompileShader(const Device& device, const ShaderIR& ir, const Registry& registry, | ||
| 2979 | ShaderType stage, std::string_view identifier, | ||
| 2980 | std::string_view suffix) { | ||
| 2981 | GLSLDecompiler decompiler(device, ir, registry, stage, identifier, suffix); | ||
| 2982 | decompiler.Decompile(); | ||
| 2983 | return decompiler.GetResult(); | ||
| 2984 | } | ||
| 2985 | |||
| 2986 | } // namespace OpenGL | ||
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.h b/src/video_core/renderer_opengl/gl_shader_decompiler.h deleted file mode 100644 index 0397a000c..000000000 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.h +++ /dev/null | |||
| @@ -1,69 +0,0 @@ | |||
| 1 | // Copyright 2018 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <array> | ||
| 8 | #include <string> | ||
| 9 | #include <string_view> | ||
| 10 | #include <utility> | ||
| 11 | #include <vector> | ||
| 12 | #include "common/common_types.h" | ||
| 13 | #include "video_core/engines/maxwell_3d.h" | ||
| 14 | #include "video_core/engines/shader_type.h" | ||
| 15 | #include "video_core/shader/registry.h" | ||
| 16 | #include "video_core/shader/shader_ir.h" | ||
| 17 | |||
| 18 | namespace OpenGL { | ||
| 19 | |||
| 20 | class Device; | ||
| 21 | |||
| 22 | using Maxwell = Tegra::Engines::Maxwell3D::Regs; | ||
| 23 | using SamplerEntry = VideoCommon::Shader::SamplerEntry; | ||
| 24 | using ImageEntry = VideoCommon::Shader::ImageEntry; | ||
| 25 | |||
| 26 | class ConstBufferEntry : public VideoCommon::Shader::ConstBuffer { | ||
| 27 | public: | ||
| 28 | explicit ConstBufferEntry(u32 max_offset_, bool is_indirect_, u32 index_) | ||
| 29 | : ConstBuffer{max_offset_, is_indirect_}, index{index_} {} | ||
| 30 | |||
| 31 | u32 GetIndex() const { | ||
| 32 | return index; | ||
| 33 | } | ||
| 34 | |||
| 35 | private: | ||
| 36 | u32 index = 0; | ||
| 37 | }; | ||
| 38 | |||
| 39 | struct GlobalMemoryEntry { | ||
| 40 | constexpr explicit GlobalMemoryEntry(u32 cbuf_index_, u32 cbuf_offset_, bool is_read_, | ||
| 41 | bool is_written_) | ||
| 42 | : cbuf_index{cbuf_index_}, cbuf_offset{cbuf_offset_}, is_read{is_read_}, is_written{ | ||
| 43 | is_written_} {} | ||
| 44 | |||
| 45 | u32 cbuf_index = 0; | ||
| 46 | u32 cbuf_offset = 0; | ||
| 47 | bool is_read = false; | ||
| 48 | bool is_written = false; | ||
| 49 | }; | ||
| 50 | |||
| 51 | struct ShaderEntries { | ||
| 52 | std::vector<ConstBufferEntry> const_buffers; | ||
| 53 | std::vector<GlobalMemoryEntry> global_memory_entries; | ||
| 54 | std::vector<SamplerEntry> samplers; | ||
| 55 | std::vector<ImageEntry> images; | ||
| 56 | std::size_t shader_length{}; | ||
| 57 | u32 clip_distances{}; | ||
| 58 | u32 enabled_uniform_buffers{}; | ||
| 59 | }; | ||
| 60 | |||
| 61 | ShaderEntries MakeEntries(const Device& device, const VideoCommon::Shader::ShaderIR& ir, | ||
| 62 | Tegra::Engines::ShaderType stage); | ||
| 63 | |||
| 64 | std::string DecompileShader(const Device& device, const VideoCommon::Shader::ShaderIR& ir, | ||
| 65 | const VideoCommon::Shader::Registry& registry, | ||
| 66 | Tegra::Engines::ShaderType stage, std::string_view identifier, | ||
| 67 | std::string_view suffix = {}); | ||
| 68 | |||
| 69 | } // namespace OpenGL | ||
diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp deleted file mode 100644 index 0deb86517..000000000 --- a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp +++ /dev/null | |||
| @@ -1,482 +0,0 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <cstring> | ||
| 6 | |||
| 7 | #include <fmt/format.h> | ||
| 8 | |||
| 9 | #include "common/assert.h" | ||
| 10 | #include "common/common_types.h" | ||
| 11 | #include "common/fs/file.h" | ||
| 12 | #include "common/fs/fs.h" | ||
| 13 | #include "common/fs/path_util.h" | ||
| 14 | #include "common/logging/log.h" | ||
| 15 | #include "common/scm_rev.h" | ||
| 16 | #include "common/settings.h" | ||
| 17 | #include "common/zstd_compression.h" | ||
| 18 | #include "core/core.h" | ||
| 19 | #include "core/hle/kernel/k_process.h" | ||
| 20 | #include "video_core/engines/shader_type.h" | ||
| 21 | #include "video_core/renderer_opengl/gl_shader_cache.h" | ||
| 22 | #include "video_core/renderer_opengl/gl_shader_disk_cache.h" | ||
| 23 | |||
| 24 | namespace OpenGL { | ||
| 25 | |||
| 26 | using Tegra::Engines::ShaderType; | ||
| 27 | using VideoCommon::Shader::BindlessSamplerMap; | ||
| 28 | using VideoCommon::Shader::BoundSamplerMap; | ||
| 29 | using VideoCommon::Shader::KeyMap; | ||
| 30 | using VideoCommon::Shader::SeparateSamplerKey; | ||
| 31 | using ShaderCacheVersionHash = std::array<u8, 64>; | ||
| 32 | |||
| 33 | struct ConstBufferKey { | ||
| 34 | u32 cbuf = 0; | ||
| 35 | u32 offset = 0; | ||
| 36 | u32 value = 0; | ||
| 37 | }; | ||
| 38 | |||
| 39 | struct BoundSamplerEntry { | ||
| 40 | u32 offset = 0; | ||
| 41 | Tegra::Engines::SamplerDescriptor sampler; | ||
| 42 | }; | ||
| 43 | |||
| 44 | struct SeparateSamplerEntry { | ||
| 45 | u32 cbuf1 = 0; | ||
| 46 | u32 cbuf2 = 0; | ||
| 47 | u32 offset1 = 0; | ||
| 48 | u32 offset2 = 0; | ||
| 49 | Tegra::Engines::SamplerDescriptor sampler; | ||
| 50 | }; | ||
| 51 | |||
| 52 | struct BindlessSamplerEntry { | ||
| 53 | u32 cbuf = 0; | ||
| 54 | u32 offset = 0; | ||
| 55 | Tegra::Engines::SamplerDescriptor sampler; | ||
| 56 | }; | ||
| 57 | |||
| 58 | namespace { | ||
| 59 | |||
| 60 | constexpr u32 NativeVersion = 21; | ||
| 61 | |||
| 62 | ShaderCacheVersionHash GetShaderCacheVersionHash() { | ||
| 63 | ShaderCacheVersionHash hash{}; | ||
| 64 | const std::size_t length = std::min(std::strlen(Common::g_shader_cache_version), hash.size()); | ||
| 65 | std::memcpy(hash.data(), Common::g_shader_cache_version, length); | ||
| 66 | return hash; | ||
| 67 | } | ||
| 68 | |||
| 69 | } // Anonymous namespace | ||
| 70 | |||
| 71 | ShaderDiskCacheEntry::ShaderDiskCacheEntry() = default; | ||
| 72 | |||
| 73 | ShaderDiskCacheEntry::~ShaderDiskCacheEntry() = default; | ||
| 74 | |||
| 75 | bool ShaderDiskCacheEntry::Load(Common::FS::IOFile& file) { | ||
| 76 | if (!file.ReadObject(type)) { | ||
| 77 | return false; | ||
| 78 | } | ||
| 79 | u32 code_size; | ||
| 80 | u32 code_size_b; | ||
| 81 | if (!file.ReadObject(code_size) || !file.ReadObject(code_size_b)) { | ||
| 82 | return false; | ||
| 83 | } | ||
| 84 | code.resize(code_size); | ||
| 85 | code_b.resize(code_size_b); | ||
| 86 | if (file.Read(code) != code_size) { | ||
| 87 | return false; | ||
| 88 | } | ||
| 89 | if (HasProgramA() && file.Read(code_b) != code_size_b) { | ||
| 90 | return false; | ||
| 91 | } | ||
| 92 | |||
| 93 | u8 is_texture_handler_size_known; | ||
| 94 | u32 texture_handler_size_value; | ||
| 95 | u32 num_keys; | ||
| 96 | u32 num_bound_samplers; | ||
| 97 | u32 num_separate_samplers; | ||
| 98 | u32 num_bindless_samplers; | ||
| 99 | if (!file.ReadObject(unique_identifier) || !file.ReadObject(bound_buffer) || | ||
| 100 | !file.ReadObject(is_texture_handler_size_known) || | ||
| 101 | !file.ReadObject(texture_handler_size_value) || !file.ReadObject(graphics_info) || | ||
| 102 | !file.ReadObject(compute_info) || !file.ReadObject(num_keys) || | ||
| 103 | !file.ReadObject(num_bound_samplers) || !file.ReadObject(num_separate_samplers) || | ||
| 104 | !file.ReadObject(num_bindless_samplers)) { | ||
| 105 | return false; | ||
| 106 | } | ||
| 107 | if (is_texture_handler_size_known) { | ||
| 108 | texture_handler_size = texture_handler_size_value; | ||
| 109 | } | ||
| 110 | |||
| 111 | std::vector<ConstBufferKey> flat_keys(num_keys); | ||
| 112 | std::vector<BoundSamplerEntry> flat_bound_samplers(num_bound_samplers); | ||
| 113 | std::vector<SeparateSamplerEntry> flat_separate_samplers(num_separate_samplers); | ||
| 114 | std::vector<BindlessSamplerEntry> flat_bindless_samplers(num_bindless_samplers); | ||
| 115 | if (file.Read(flat_keys) != flat_keys.size() || | ||
| 116 | file.Read(flat_bound_samplers) != flat_bound_samplers.size() || | ||
| 117 | file.Read(flat_separate_samplers) != flat_separate_samplers.size() || | ||
| 118 | file.Read(flat_bindless_samplers) != flat_bindless_samplers.size()) { | ||
| 119 | return false; | ||
| 120 | } | ||
| 121 | for (const auto& entry : flat_keys) { | ||
| 122 | keys.insert({{entry.cbuf, entry.offset}, entry.value}); | ||
| 123 | } | ||
| 124 | for (const auto& entry : flat_bound_samplers) { | ||
| 125 | bound_samplers.emplace(entry.offset, entry.sampler); | ||
| 126 | } | ||
| 127 | for (const auto& entry : flat_separate_samplers) { | ||
| 128 | SeparateSamplerKey key; | ||
| 129 | key.buffers = {entry.cbuf1, entry.cbuf2}; | ||
| 130 | key.offsets = {entry.offset1, entry.offset2}; | ||
| 131 | separate_samplers.emplace(key, entry.sampler); | ||
| 132 | } | ||
| 133 | for (const auto& entry : flat_bindless_samplers) { | ||
| 134 | bindless_samplers.insert({{entry.cbuf, entry.offset}, entry.sampler}); | ||
| 135 | } | ||
| 136 | |||
| 137 | return true; | ||
| 138 | } | ||
| 139 | |||
| 140 | bool ShaderDiskCacheEntry::Save(Common::FS::IOFile& file) const { | ||
| 141 | if (!file.WriteObject(static_cast<u32>(type)) || | ||
| 142 | !file.WriteObject(static_cast<u32>(code.size())) || | ||
| 143 | !file.WriteObject(static_cast<u32>(code_b.size()))) { | ||
| 144 | return false; | ||
| 145 | } | ||
| 146 | if (file.Write(code) != code.size()) { | ||
| 147 | return false; | ||
| 148 | } | ||
| 149 | if (HasProgramA() && file.Write(code_b) != code_b.size()) { | ||
| 150 | return false; | ||
| 151 | } | ||
| 152 | |||
| 153 | if (!file.WriteObject(unique_identifier) || !file.WriteObject(bound_buffer) || | ||
| 154 | !file.WriteObject(static_cast<u8>(texture_handler_size.has_value())) || | ||
| 155 | !file.WriteObject(texture_handler_size.value_or(0)) || !file.WriteObject(graphics_info) || | ||
| 156 | !file.WriteObject(compute_info) || !file.WriteObject(static_cast<u32>(keys.size())) || | ||
| 157 | !file.WriteObject(static_cast<u32>(bound_samplers.size())) || | ||
| 158 | !file.WriteObject(static_cast<u32>(separate_samplers.size())) || | ||
| 159 | !file.WriteObject(static_cast<u32>(bindless_samplers.size()))) { | ||
| 160 | return false; | ||
| 161 | } | ||
| 162 | |||
| 163 | std::vector<ConstBufferKey> flat_keys; | ||
| 164 | flat_keys.reserve(keys.size()); | ||
| 165 | for (const auto& [address, value] : keys) { | ||
| 166 | flat_keys.push_back(ConstBufferKey{address.first, address.second, value}); | ||
| 167 | } | ||
| 168 | |||
| 169 | std::vector<BoundSamplerEntry> flat_bound_samplers; | ||
| 170 | flat_bound_samplers.reserve(bound_samplers.size()); | ||
| 171 | for (const auto& [address, sampler] : bound_samplers) { | ||
| 172 | flat_bound_samplers.push_back(BoundSamplerEntry{address, sampler}); | ||
| 173 | } | ||
| 174 | |||
| 175 | std::vector<SeparateSamplerEntry> flat_separate_samplers; | ||
| 176 | flat_separate_samplers.reserve(separate_samplers.size()); | ||
| 177 | for (const auto& [key, sampler] : separate_samplers) { | ||
| 178 | SeparateSamplerEntry entry; | ||
| 179 | std::tie(entry.cbuf1, entry.cbuf2) = key.buffers; | ||
| 180 | std::tie(entry.offset1, entry.offset2) = key.offsets; | ||
| 181 | entry.sampler = sampler; | ||
| 182 | flat_separate_samplers.push_back(entry); | ||
| 183 | } | ||
| 184 | |||
| 185 | std::vector<BindlessSamplerEntry> flat_bindless_samplers; | ||
| 186 | flat_bindless_samplers.reserve(bindless_samplers.size()); | ||
| 187 | for (const auto& [address, sampler] : bindless_samplers) { | ||
| 188 | flat_bindless_samplers.push_back( | ||
| 189 | BindlessSamplerEntry{address.first, address.second, sampler}); | ||
| 190 | } | ||
| 191 | |||
| 192 | return file.Write(flat_keys) == flat_keys.size() && | ||
| 193 | file.Write(flat_bound_samplers) == flat_bound_samplers.size() && | ||
| 194 | file.Write(flat_separate_samplers) == flat_separate_samplers.size() && | ||
| 195 | file.Write(flat_bindless_samplers) == flat_bindless_samplers.size(); | ||
| 196 | } | ||
| 197 | |||
| 198 | ShaderDiskCacheOpenGL::ShaderDiskCacheOpenGL() = default; | ||
| 199 | |||
| 200 | ShaderDiskCacheOpenGL::~ShaderDiskCacheOpenGL() = default; | ||
| 201 | |||
| 202 | void ShaderDiskCacheOpenGL::BindTitleID(u64 title_id_) { | ||
| 203 | title_id = title_id_; | ||
| 204 | } | ||
| 205 | |||
| 206 | std::optional<std::vector<ShaderDiskCacheEntry>> ShaderDiskCacheOpenGL::LoadTransferable() { | ||
| 207 | // Skip games without title id | ||
| 208 | const bool has_title_id = title_id != 0; | ||
| 209 | if (!Settings::values.use_disk_shader_cache.GetValue() || !has_title_id) { | ||
| 210 | return std::nullopt; | ||
| 211 | } | ||
| 212 | |||
| 213 | Common::FS::IOFile file{GetTransferablePath(), Common::FS::FileAccessMode::Read, | ||
| 214 | Common::FS::FileType::BinaryFile}; | ||
| 215 | if (!file.IsOpen()) { | ||
| 216 | LOG_INFO(Render_OpenGL, "No transferable shader cache found"); | ||
| 217 | is_usable = true; | ||
| 218 | return std::nullopt; | ||
| 219 | } | ||
| 220 | |||
| 221 | u32 version{}; | ||
| 222 | if (!file.ReadObject(version)) { | ||
| 223 | LOG_ERROR(Render_OpenGL, "Failed to get transferable cache version, skipping it"); | ||
| 224 | return std::nullopt; | ||
| 225 | } | ||
| 226 | |||
| 227 | if (version < NativeVersion) { | ||
| 228 | LOG_INFO(Render_OpenGL, "Transferable shader cache is old, removing"); | ||
| 229 | file.Close(); | ||
| 230 | InvalidateTransferable(); | ||
| 231 | is_usable = true; | ||
| 232 | return std::nullopt; | ||
| 233 | } | ||
| 234 | if (version > NativeVersion) { | ||
| 235 | LOG_WARNING(Render_OpenGL, "Transferable shader cache was generated with a newer version " | ||
| 236 | "of the emulator, skipping"); | ||
| 237 | return std::nullopt; | ||
| 238 | } | ||
| 239 | |||
| 240 | // Version is valid, load the shaders | ||
| 241 | std::vector<ShaderDiskCacheEntry> entries; | ||
| 242 | while (static_cast<u64>(file.Tell()) < file.GetSize()) { | ||
| 243 | ShaderDiskCacheEntry& entry = entries.emplace_back(); | ||
| 244 | if (!entry.Load(file)) { | ||
| 245 | LOG_ERROR(Render_OpenGL, "Failed to load transferable raw entry, skipping"); | ||
| 246 | return std::nullopt; | ||
| 247 | } | ||
| 248 | } | ||
| 249 | |||
| 250 | is_usable = true; | ||
| 251 | return {std::move(entries)}; | ||
| 252 | } | ||
| 253 | |||
| 254 | std::vector<ShaderDiskCachePrecompiled> ShaderDiskCacheOpenGL::LoadPrecompiled() { | ||
| 255 | if (!is_usable) { | ||
| 256 | return {}; | ||
| 257 | } | ||
| 258 | |||
| 259 | Common::FS::IOFile file{GetPrecompiledPath(), Common::FS::FileAccessMode::Read, | ||
| 260 | Common::FS::FileType::BinaryFile}; | ||
| 261 | if (!file.IsOpen()) { | ||
| 262 | LOG_INFO(Render_OpenGL, "No precompiled shader cache found"); | ||
| 263 | return {}; | ||
| 264 | } | ||
| 265 | |||
| 266 | if (const auto result = LoadPrecompiledFile(file)) { | ||
| 267 | return *result; | ||
| 268 | } | ||
| 269 | |||
| 270 | LOG_INFO(Render_OpenGL, "Failed to load precompiled cache"); | ||
| 271 | file.Close(); | ||
| 272 | InvalidatePrecompiled(); | ||
| 273 | return {}; | ||
| 274 | } | ||
| 275 | |||
| 276 | std::optional<std::vector<ShaderDiskCachePrecompiled>> ShaderDiskCacheOpenGL::LoadPrecompiledFile( | ||
| 277 | Common::FS::IOFile& file) { | ||
| 278 | // Read compressed file from disk and decompress to virtual precompiled cache file | ||
| 279 | std::vector<u8> compressed(file.GetSize()); | ||
| 280 | if (file.Read(compressed) != file.GetSize()) { | ||
| 281 | return std::nullopt; | ||
| 282 | } | ||
| 283 | const std::vector<u8> decompressed = Common::Compression::DecompressDataZSTD(compressed); | ||
| 284 | SaveArrayToPrecompiled(decompressed.data(), decompressed.size()); | ||
| 285 | precompiled_cache_virtual_file_offset = 0; | ||
| 286 | |||
| 287 | ShaderCacheVersionHash file_hash{}; | ||
| 288 | if (!LoadArrayFromPrecompiled(file_hash.data(), file_hash.size())) { | ||
| 289 | precompiled_cache_virtual_file_offset = 0; | ||
| 290 | return std::nullopt; | ||
| 291 | } | ||
| 292 | if (GetShaderCacheVersionHash() != file_hash) { | ||
| 293 | LOG_INFO(Render_OpenGL, "Precompiled cache is from another version of the emulator"); | ||
| 294 | precompiled_cache_virtual_file_offset = 0; | ||
| 295 | return std::nullopt; | ||
| 296 | } | ||
| 297 | |||
| 298 | std::vector<ShaderDiskCachePrecompiled> entries; | ||
| 299 | while (precompiled_cache_virtual_file_offset < precompiled_cache_virtual_file.GetSize()) { | ||
| 300 | u32 binary_size; | ||
| 301 | auto& entry = entries.emplace_back(); | ||
| 302 | if (!LoadObjectFromPrecompiled(entry.unique_identifier) || | ||
| 303 | !LoadObjectFromPrecompiled(entry.binary_format) || | ||
| 304 | !LoadObjectFromPrecompiled(binary_size)) { | ||
| 305 | return std::nullopt; | ||
| 306 | } | ||
| 307 | |||
| 308 | entry.binary.resize(binary_size); | ||
| 309 | if (!LoadArrayFromPrecompiled(entry.binary.data(), entry.binary.size())) { | ||
| 310 | return std::nullopt; | ||
| 311 | } | ||
| 312 | } | ||
| 313 | return entries; | ||
| 314 | } | ||
| 315 | |||
| 316 | void ShaderDiskCacheOpenGL::InvalidateTransferable() { | ||
| 317 | if (!Common::FS::RemoveFile(GetTransferablePath())) { | ||
| 318 | LOG_ERROR(Render_OpenGL, "Failed to invalidate transferable file={}", | ||
| 319 | Common::FS::PathToUTF8String(GetTransferablePath())); | ||
| 320 | } | ||
| 321 | InvalidatePrecompiled(); | ||
| 322 | } | ||
| 323 | |||
| 324 | void ShaderDiskCacheOpenGL::InvalidatePrecompiled() { | ||
| 325 | // Clear virtaul precompiled cache file | ||
| 326 | precompiled_cache_virtual_file.Resize(0); | ||
| 327 | |||
| 328 | if (!Common::FS::RemoveFile(GetPrecompiledPath())) { | ||
| 329 | LOG_ERROR(Render_OpenGL, "Failed to invalidate precompiled file={}", | ||
| 330 | Common::FS::PathToUTF8String(GetPrecompiledPath())); | ||
| 331 | } | ||
| 332 | } | ||
| 333 | |||
| 334 | void ShaderDiskCacheOpenGL::SaveEntry(const ShaderDiskCacheEntry& entry) { | ||
| 335 | if (!is_usable) { | ||
| 336 | return; | ||
| 337 | } | ||
| 338 | |||
| 339 | const u64 id = entry.unique_identifier; | ||
| 340 | if (stored_transferable.contains(id)) { | ||
| 341 | // The shader already exists | ||
| 342 | return; | ||
| 343 | } | ||
| 344 | |||
| 345 | Common::FS::IOFile file = AppendTransferableFile(); | ||
| 346 | if (!file.IsOpen()) { | ||
| 347 | return; | ||
| 348 | } | ||
| 349 | if (!entry.Save(file)) { | ||
| 350 | LOG_ERROR(Render_OpenGL, "Failed to save raw transferable cache entry, removing"); | ||
| 351 | file.Close(); | ||
| 352 | InvalidateTransferable(); | ||
| 353 | return; | ||
| 354 | } | ||
| 355 | |||
| 356 | stored_transferable.insert(id); | ||
| 357 | } | ||
| 358 | |||
| 359 | void ShaderDiskCacheOpenGL::SavePrecompiled(u64 unique_identifier, GLuint program) { | ||
| 360 | if (!is_usable) { | ||
| 361 | return; | ||
| 362 | } | ||
| 363 | |||
| 364 | // TODO(Rodrigo): This is a design smell. I shouldn't be having to manually write the header | ||
| 365 | // when writing the dump. This should be done the moment I get access to write to the virtual | ||
| 366 | // file. | ||
| 367 | if (precompiled_cache_virtual_file.GetSize() == 0) { | ||
| 368 | SavePrecompiledHeaderToVirtualPrecompiledCache(); | ||
| 369 | } | ||
| 370 | |||
| 371 | GLint binary_length; | ||
| 372 | glGetProgramiv(program, GL_PROGRAM_BINARY_LENGTH, &binary_length); | ||
| 373 | |||
| 374 | GLenum binary_format; | ||
| 375 | std::vector<u8> binary(binary_length); | ||
| 376 | glGetProgramBinary(program, binary_length, nullptr, &binary_format, binary.data()); | ||
| 377 | |||
| 378 | if (!SaveObjectToPrecompiled(unique_identifier) || !SaveObjectToPrecompiled(binary_format) || | ||
| 379 | !SaveObjectToPrecompiled(static_cast<u32>(binary.size())) || | ||
| 380 | !SaveArrayToPrecompiled(binary.data(), binary.size())) { | ||
| 381 | LOG_ERROR(Render_OpenGL, "Failed to save binary program file in shader={:016X}, removing", | ||
| 382 | unique_identifier); | ||
| 383 | InvalidatePrecompiled(); | ||
| 384 | } | ||
| 385 | } | ||
| 386 | |||
| 387 | Common::FS::IOFile ShaderDiskCacheOpenGL::AppendTransferableFile() const { | ||
| 388 | if (!EnsureDirectories()) { | ||
| 389 | return {}; | ||
| 390 | } | ||
| 391 | |||
| 392 | const auto transferable_path{GetTransferablePath()}; | ||
| 393 | const bool existed = Common::FS::Exists(transferable_path); | ||
| 394 | |||
| 395 | Common::FS::IOFile file{transferable_path, Common::FS::FileAccessMode::Append, | ||
| 396 | Common::FS::FileType::BinaryFile}; | ||
| 397 | if (!file.IsOpen()) { | ||
| 398 | LOG_ERROR(Render_OpenGL, "Failed to open transferable cache in path={}", | ||
| 399 | Common::FS::PathToUTF8String(transferable_path)); | ||
| 400 | return {}; | ||
| 401 | } | ||
| 402 | if (!existed || file.GetSize() == 0) { | ||
| 403 | // If the file didn't exist, write its version | ||
| 404 | if (!file.WriteObject(NativeVersion)) { | ||
| 405 | LOG_ERROR(Render_OpenGL, "Failed to write transferable cache version in path={}", | ||
| 406 | Common::FS::PathToUTF8String(transferable_path)); | ||
| 407 | return {}; | ||
| 408 | } | ||
| 409 | } | ||
| 410 | return file; | ||
| 411 | } | ||
| 412 | |||
| 413 | void ShaderDiskCacheOpenGL::SavePrecompiledHeaderToVirtualPrecompiledCache() { | ||
| 414 | const auto hash{GetShaderCacheVersionHash()}; | ||
| 415 | if (!SaveArrayToPrecompiled(hash.data(), hash.size())) { | ||
| 416 | LOG_ERROR( | ||
| 417 | Render_OpenGL, | ||
| 418 | "Failed to write precompiled cache version hash to virtual precompiled cache file"); | ||
| 419 | } | ||
| 420 | } | ||
| 421 | |||
| 422 | void ShaderDiskCacheOpenGL::SaveVirtualPrecompiledFile() { | ||
| 423 | precompiled_cache_virtual_file_offset = 0; | ||
| 424 | const std::vector<u8> uncompressed = precompiled_cache_virtual_file.ReadAllBytes(); | ||
| 425 | const std::vector<u8> compressed = | ||
| 426 | Common::Compression::CompressDataZSTDDefault(uncompressed.data(), uncompressed.size()); | ||
| 427 | |||
| 428 | const auto precompiled_path = GetPrecompiledPath(); | ||
| 429 | Common::FS::IOFile file{precompiled_path, Common::FS::FileAccessMode::Write, | ||
| 430 | Common::FS::FileType::BinaryFile}; | ||
| 431 | |||
| 432 | if (!file.IsOpen()) { | ||
| 433 | LOG_ERROR(Render_OpenGL, "Failed to open precompiled cache in path={}", | ||
| 434 | Common::FS::PathToUTF8String(precompiled_path)); | ||
| 435 | return; | ||
| 436 | } | ||
| 437 | if (file.Write(compressed) != compressed.size()) { | ||
| 438 | LOG_ERROR(Render_OpenGL, "Failed to write precompiled cache version in path={}", | ||
| 439 | Common::FS::PathToUTF8String(precompiled_path)); | ||
| 440 | } | ||
| 441 | } | ||
| 442 | |||
| 443 | bool ShaderDiskCacheOpenGL::EnsureDirectories() const { | ||
| 444 | const auto CreateDir = [](const std::filesystem::path& dir) { | ||
| 445 | if (!Common::FS::CreateDir(dir)) { | ||
| 446 | LOG_ERROR(Render_OpenGL, "Failed to create directory={}", | ||
| 447 | Common::FS::PathToUTF8String(dir)); | ||
| 448 | return false; | ||
| 449 | } | ||
| 450 | return true; | ||
| 451 | }; | ||
| 452 | |||
| 453 | return CreateDir(Common::FS::GetYuzuPath(Common::FS::YuzuPath::ShaderDir)) && | ||
| 454 | CreateDir(GetBaseDir()) && CreateDir(GetTransferableDir()) && | ||
| 455 | CreateDir(GetPrecompiledDir()); | ||
| 456 | } | ||
| 457 | |||
| 458 | std::filesystem::path ShaderDiskCacheOpenGL::GetTransferablePath() const { | ||
| 459 | return GetTransferableDir() / fmt::format("{}.bin", GetTitleID()); | ||
| 460 | } | ||
| 461 | |||
| 462 | std::filesystem::path ShaderDiskCacheOpenGL::GetPrecompiledPath() const { | ||
| 463 | return GetPrecompiledDir() / fmt::format("{}.bin", GetTitleID()); | ||
| 464 | } | ||
| 465 | |||
| 466 | std::filesystem::path ShaderDiskCacheOpenGL::GetTransferableDir() const { | ||
| 467 | return GetBaseDir() / "transferable"; | ||
| 468 | } | ||
| 469 | |||
| 470 | std::filesystem::path ShaderDiskCacheOpenGL::GetPrecompiledDir() const { | ||
| 471 | return GetBaseDir() / "precompiled"; | ||
| 472 | } | ||
| 473 | |||
| 474 | std::filesystem::path ShaderDiskCacheOpenGL::GetBaseDir() const { | ||
| 475 | return Common::FS::GetYuzuPath(Common::FS::YuzuPath::ShaderDir) / "opengl"; | ||
| 476 | } | ||
| 477 | |||
| 478 | std::string ShaderDiskCacheOpenGL::GetTitleID() const { | ||
| 479 | return fmt::format("{:016X}", title_id); | ||
| 480 | } | ||
| 481 | |||
| 482 | } // namespace OpenGL | ||
diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.h b/src/video_core/renderer_opengl/gl_shader_disk_cache.h deleted file mode 100644 index f8bc23868..000000000 --- a/src/video_core/renderer_opengl/gl_shader_disk_cache.h +++ /dev/null | |||
| @@ -1,176 +0,0 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <filesystem> | ||
| 8 | #include <optional> | ||
| 9 | #include <string> | ||
| 10 | #include <tuple> | ||
| 11 | #include <type_traits> | ||
| 12 | #include <unordered_map> | ||
| 13 | #include <unordered_set> | ||
| 14 | #include <utility> | ||
| 15 | #include <vector> | ||
| 16 | |||
| 17 | #include <glad/glad.h> | ||
| 18 | |||
| 19 | #include "common/assert.h" | ||
| 20 | #include "common/common_types.h" | ||
| 21 | #include "core/file_sys/vfs_vector.h" | ||
| 22 | #include "video_core/engines/shader_type.h" | ||
| 23 | #include "video_core/shader/registry.h" | ||
| 24 | |||
| 25 | namespace Common::FS { | ||
| 26 | class IOFile; | ||
| 27 | } | ||
| 28 | |||
| 29 | namespace OpenGL { | ||
| 30 | |||
| 31 | using ProgramCode = std::vector<u64>; | ||
| 32 | |||
| 33 | /// Describes a shader and how it's used by the guest GPU | ||
| 34 | struct ShaderDiskCacheEntry { | ||
| 35 | ShaderDiskCacheEntry(); | ||
| 36 | ~ShaderDiskCacheEntry(); | ||
| 37 | |||
| 38 | bool Load(Common::FS::IOFile& file); | ||
| 39 | |||
| 40 | bool Save(Common::FS::IOFile& file) const; | ||
| 41 | |||
| 42 | bool HasProgramA() const { | ||
| 43 | return !code.empty() && !code_b.empty(); | ||
| 44 | } | ||
| 45 | |||
| 46 | Tegra::Engines::ShaderType type{}; | ||
| 47 | ProgramCode code; | ||
| 48 | ProgramCode code_b; | ||
| 49 | |||
| 50 | u64 unique_identifier = 0; | ||
| 51 | std::optional<u32> texture_handler_size; | ||
| 52 | u32 bound_buffer = 0; | ||
| 53 | VideoCommon::Shader::GraphicsInfo graphics_info; | ||
| 54 | VideoCommon::Shader::ComputeInfo compute_info; | ||
| 55 | VideoCommon::Shader::KeyMap keys; | ||
| 56 | VideoCommon::Shader::BoundSamplerMap bound_samplers; | ||
| 57 | VideoCommon::Shader::SeparateSamplerMap separate_samplers; | ||
| 58 | VideoCommon::Shader::BindlessSamplerMap bindless_samplers; | ||
| 59 | }; | ||
| 60 | |||
| 61 | /// Contains an OpenGL dumped binary program | ||
| 62 | struct ShaderDiskCachePrecompiled { | ||
| 63 | u64 unique_identifier = 0; | ||
| 64 | GLenum binary_format = 0; | ||
| 65 | std::vector<u8> binary; | ||
| 66 | }; | ||
| 67 | |||
| 68 | class ShaderDiskCacheOpenGL { | ||
| 69 | public: | ||
| 70 | explicit ShaderDiskCacheOpenGL(); | ||
| 71 | ~ShaderDiskCacheOpenGL(); | ||
| 72 | |||
| 73 | /// Binds a title ID for all future operations. | ||
| 74 | void BindTitleID(u64 title_id); | ||
| 75 | |||
| 76 | /// Loads transferable cache. If file has a old version or on failure, it deletes the file. | ||
| 77 | std::optional<std::vector<ShaderDiskCacheEntry>> LoadTransferable(); | ||
| 78 | |||
| 79 | /// Loads current game's precompiled cache. Invalidates on failure. | ||
| 80 | std::vector<ShaderDiskCachePrecompiled> LoadPrecompiled(); | ||
| 81 | |||
| 82 | /// Removes the transferable (and precompiled) cache file. | ||
| 83 | void InvalidateTransferable(); | ||
| 84 | |||
| 85 | /// Removes the precompiled cache file and clears virtual precompiled cache file. | ||
| 86 | void InvalidatePrecompiled(); | ||
| 87 | |||
| 88 | /// Saves a raw dump to the transferable file. Checks for collisions. | ||
| 89 | void SaveEntry(const ShaderDiskCacheEntry& entry); | ||
| 90 | |||
| 91 | /// Saves a dump entry to the precompiled file. Does not check for collisions. | ||
| 92 | void SavePrecompiled(u64 unique_identifier, GLuint program); | ||
| 93 | |||
| 94 | /// Serializes virtual precompiled shader cache file to real file | ||
| 95 | void SaveVirtualPrecompiledFile(); | ||
| 96 | |||
| 97 | private: | ||
| 98 | /// Loads the transferable cache. Returns empty on failure. | ||
| 99 | std::optional<std::vector<ShaderDiskCachePrecompiled>> LoadPrecompiledFile( | ||
| 100 | Common::FS::IOFile& file); | ||
| 101 | |||
| 102 | /// Opens current game's transferable file and write it's header if it doesn't exist | ||
| 103 | Common::FS::IOFile AppendTransferableFile() const; | ||
| 104 | |||
| 105 | /// Save precompiled header to precompiled_cache_in_memory | ||
| 106 | void SavePrecompiledHeaderToVirtualPrecompiledCache(); | ||
| 107 | |||
| 108 | /// Create shader disk cache directories. Returns true on success. | ||
| 109 | bool EnsureDirectories() const; | ||
| 110 | |||
| 111 | /// Gets current game's transferable file path | ||
| 112 | std::filesystem::path GetTransferablePath() const; | ||
| 113 | |||
| 114 | /// Gets current game's precompiled file path | ||
| 115 | std::filesystem::path GetPrecompiledPath() const; | ||
| 116 | |||
| 117 | /// Get user's transferable directory path | ||
| 118 | std::filesystem::path GetTransferableDir() const; | ||
| 119 | |||
| 120 | /// Get user's precompiled directory path | ||
| 121 | std::filesystem::path GetPrecompiledDir() const; | ||
| 122 | |||
| 123 | /// Get user's shader directory path | ||
| 124 | std::filesystem::path GetBaseDir() const; | ||
| 125 | |||
| 126 | /// Get current game's title id | ||
| 127 | std::string GetTitleID() const; | ||
| 128 | |||
| 129 | template <typename T> | ||
| 130 | bool SaveArrayToPrecompiled(const T* data, std::size_t length) { | ||
| 131 | const std::size_t write_length = precompiled_cache_virtual_file.WriteArray( | ||
| 132 | data, length, precompiled_cache_virtual_file_offset); | ||
| 133 | precompiled_cache_virtual_file_offset += write_length; | ||
| 134 | return write_length == sizeof(T) * length; | ||
| 135 | } | ||
| 136 | |||
| 137 | template <typename T> | ||
| 138 | bool LoadArrayFromPrecompiled(T* data, std::size_t length) { | ||
| 139 | const std::size_t read_length = precompiled_cache_virtual_file.ReadArray( | ||
| 140 | data, length, precompiled_cache_virtual_file_offset); | ||
| 141 | precompiled_cache_virtual_file_offset += read_length; | ||
| 142 | return read_length == sizeof(T) * length; | ||
| 143 | } | ||
| 144 | |||
| 145 | template <typename T> | ||
| 146 | bool SaveObjectToPrecompiled(const T& object) { | ||
| 147 | return SaveArrayToPrecompiled(&object, 1); | ||
| 148 | } | ||
| 149 | |||
| 150 | bool SaveObjectToPrecompiled(bool object) { | ||
| 151 | const auto value = static_cast<u8>(object); | ||
| 152 | return SaveArrayToPrecompiled(&value, 1); | ||
| 153 | } | ||
| 154 | |||
| 155 | template <typename T> | ||
| 156 | bool LoadObjectFromPrecompiled(T& object) { | ||
| 157 | return LoadArrayFromPrecompiled(&object, 1); | ||
| 158 | } | ||
| 159 | |||
| 160 | // Stores whole precompiled cache which will be read from or saved to the precompiled chache | ||
| 161 | // file | ||
| 162 | FileSys::VectorVfsFile precompiled_cache_virtual_file; | ||
| 163 | // Stores the current offset of the precompiled cache file for IO purposes | ||
| 164 | std::size_t precompiled_cache_virtual_file_offset = 0; | ||
| 165 | |||
| 166 | // Stored transferable shaders | ||
| 167 | std::unordered_set<u64> stored_transferable; | ||
| 168 | |||
| 169 | /// Title ID to operate on | ||
| 170 | u64 title_id = 0; | ||
| 171 | |||
| 172 | // The cache has been loaded at boot | ||
| 173 | bool is_usable = false; | ||
| 174 | }; | ||
| 175 | |||
| 176 | } // namespace OpenGL | ||
diff --git a/src/video_core/renderer_vulkan/blit_image.cpp b/src/video_core/renderer_vulkan/blit_image.cpp index b7f5b8bc2..6c0d5c7f4 100644 --- a/src/video_core/renderer_vulkan/blit_image.cpp +++ b/src/video_core/renderer_vulkan/blit_image.cpp | |||
| @@ -323,7 +323,6 @@ void BindBlitState(vk::CommandBuffer cmdbuf, VkPipelineLayout layout, const Regi | |||
| 323 | cmdbuf.SetScissor(0, scissor); | 323 | cmdbuf.SetScissor(0, scissor); |
| 324 | cmdbuf.PushConstants(layout, VK_SHADER_STAGE_VERTEX_BIT, push_constants); | 324 | cmdbuf.PushConstants(layout, VK_SHADER_STAGE_VERTEX_BIT, push_constants); |
| 325 | } | 325 | } |
| 326 | |||
| 327 | } // Anonymous namespace | 326 | } // Anonymous namespace |
| 328 | 327 | ||
| 329 | BlitImageHelper::BlitImageHelper(const Device& device_, VKScheduler& scheduler_, | 328 | BlitImageHelper::BlitImageHelper(const Device& device_, VKScheduler& scheduler_, |
diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp index 3a48219b7..7a3660496 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp | |||
| @@ -8,146 +8,14 @@ | |||
| 8 | #include "video_core/renderer_vulkan/vk_descriptor_pool.h" | 8 | #include "video_core/renderer_vulkan/vk_descriptor_pool.h" |
| 9 | #include "video_core/renderer_vulkan/vk_pipeline_cache.h" | 9 | #include "video_core/renderer_vulkan/vk_pipeline_cache.h" |
| 10 | #include "video_core/renderer_vulkan/vk_scheduler.h" | 10 | #include "video_core/renderer_vulkan/vk_scheduler.h" |
| 11 | #include "video_core/renderer_vulkan/vk_shader_decompiler.h" | ||
| 12 | #include "video_core/renderer_vulkan/vk_update_descriptor.h" | 11 | #include "video_core/renderer_vulkan/vk_update_descriptor.h" |
| 13 | #include "video_core/vulkan_common/vulkan_device.h" | 12 | #include "video_core/vulkan_common/vulkan_device.h" |
| 14 | #include "video_core/vulkan_common/vulkan_wrapper.h" | 13 | #include "video_core/vulkan_common/vulkan_wrapper.h" |
| 15 | 14 | ||
| 16 | namespace Vulkan { | 15 | namespace Vulkan { |
| 17 | 16 | ||
| 18 | VKComputePipeline::VKComputePipeline(const Device& device_, VKScheduler& scheduler_, | 17 | ComputePipeline::ComputePipeline() = default; |
| 19 | VKDescriptorPool& descriptor_pool_, | ||
| 20 | VKUpdateDescriptorQueue& update_descriptor_queue_, | ||
| 21 | const SPIRVShader& shader_) | ||
| 22 | : device{device_}, scheduler{scheduler_}, entries{shader_.entries}, | ||
| 23 | descriptor_set_layout{CreateDescriptorSetLayout()}, | ||
| 24 | descriptor_allocator{descriptor_pool_, *descriptor_set_layout}, | ||
| 25 | update_descriptor_queue{update_descriptor_queue_}, layout{CreatePipelineLayout()}, | ||
| 26 | descriptor_template{CreateDescriptorUpdateTemplate()}, | ||
| 27 | shader_module{CreateShaderModule(shader_.code)}, pipeline{CreatePipeline()} {} | ||
| 28 | 18 | ||
| 29 | VKComputePipeline::~VKComputePipeline() = default; | 19 | ComputePipeline::~ComputePipeline() = default; |
| 30 | |||
| 31 | VkDescriptorSet VKComputePipeline::CommitDescriptorSet() { | ||
| 32 | if (!descriptor_template) { | ||
| 33 | return {}; | ||
| 34 | } | ||
| 35 | const VkDescriptorSet set = descriptor_allocator.Commit(); | ||
| 36 | update_descriptor_queue.Send(*descriptor_template, set); | ||
| 37 | return set; | ||
| 38 | } | ||
| 39 | |||
| 40 | vk::DescriptorSetLayout VKComputePipeline::CreateDescriptorSetLayout() const { | ||
| 41 | std::vector<VkDescriptorSetLayoutBinding> bindings; | ||
| 42 | u32 binding = 0; | ||
| 43 | const auto add_bindings = [&](VkDescriptorType descriptor_type, std::size_t num_entries) { | ||
| 44 | // TODO(Rodrigo): Maybe make individual bindings here? | ||
| 45 | for (u32 bindpoint = 0; bindpoint < static_cast<u32>(num_entries); ++bindpoint) { | ||
| 46 | bindings.push_back({ | ||
| 47 | .binding = binding++, | ||
| 48 | .descriptorType = descriptor_type, | ||
| 49 | .descriptorCount = 1, | ||
| 50 | .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, | ||
| 51 | .pImmutableSamplers = nullptr, | ||
| 52 | }); | ||
| 53 | } | ||
| 54 | }; | ||
| 55 | add_bindings(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, entries.const_buffers.size()); | ||
| 56 | add_bindings(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, entries.global_buffers.size()); | ||
| 57 | add_bindings(VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, entries.uniform_texels.size()); | ||
| 58 | add_bindings(VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, entries.samplers.size()); | ||
| 59 | add_bindings(VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER, entries.storage_texels.size()); | ||
| 60 | add_bindings(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, entries.images.size()); | ||
| 61 | |||
| 62 | return device.GetLogical().CreateDescriptorSetLayout({ | ||
| 63 | .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, | ||
| 64 | .pNext = nullptr, | ||
| 65 | .flags = 0, | ||
| 66 | .bindingCount = static_cast<u32>(bindings.size()), | ||
| 67 | .pBindings = bindings.data(), | ||
| 68 | }); | ||
| 69 | } | ||
| 70 | |||
| 71 | vk::PipelineLayout VKComputePipeline::CreatePipelineLayout() const { | ||
| 72 | return device.GetLogical().CreatePipelineLayout({ | ||
| 73 | .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, | ||
| 74 | .pNext = nullptr, | ||
| 75 | .flags = 0, | ||
| 76 | .setLayoutCount = 1, | ||
| 77 | .pSetLayouts = descriptor_set_layout.address(), | ||
| 78 | .pushConstantRangeCount = 0, | ||
| 79 | .pPushConstantRanges = nullptr, | ||
| 80 | }); | ||
| 81 | } | ||
| 82 | |||
| 83 | vk::DescriptorUpdateTemplateKHR VKComputePipeline::CreateDescriptorUpdateTemplate() const { | ||
| 84 | std::vector<VkDescriptorUpdateTemplateEntryKHR> template_entries; | ||
| 85 | u32 binding = 0; | ||
| 86 | u32 offset = 0; | ||
| 87 | FillDescriptorUpdateTemplateEntries(entries, binding, offset, template_entries); | ||
| 88 | if (template_entries.empty()) { | ||
| 89 | // If the shader doesn't use descriptor sets, skip template creation. | ||
| 90 | return {}; | ||
| 91 | } | ||
| 92 | |||
| 93 | return device.GetLogical().CreateDescriptorUpdateTemplateKHR({ | ||
| 94 | .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_UPDATE_TEMPLATE_CREATE_INFO_KHR, | ||
| 95 | .pNext = nullptr, | ||
| 96 | .flags = 0, | ||
| 97 | .descriptorUpdateEntryCount = static_cast<u32>(template_entries.size()), | ||
| 98 | .pDescriptorUpdateEntries = template_entries.data(), | ||
| 99 | .templateType = VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_DESCRIPTOR_SET_KHR, | ||
| 100 | .descriptorSetLayout = *descriptor_set_layout, | ||
| 101 | .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS, | ||
| 102 | .pipelineLayout = *layout, | ||
| 103 | .set = DESCRIPTOR_SET, | ||
| 104 | }); | ||
| 105 | } | ||
| 106 | |||
| 107 | vk::ShaderModule VKComputePipeline::CreateShaderModule(const std::vector<u32>& code) const { | ||
| 108 | device.SaveShader(code); | ||
| 109 | |||
| 110 | return device.GetLogical().CreateShaderModule({ | ||
| 111 | .sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO, | ||
| 112 | .pNext = nullptr, | ||
| 113 | .flags = 0, | ||
| 114 | .codeSize = code.size() * sizeof(u32), | ||
| 115 | .pCode = code.data(), | ||
| 116 | }); | ||
| 117 | } | ||
| 118 | |||
| 119 | vk::Pipeline VKComputePipeline::CreatePipeline() const { | ||
| 120 | |||
| 121 | VkComputePipelineCreateInfo ci{ | ||
| 122 | .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, | ||
| 123 | .pNext = nullptr, | ||
| 124 | .flags = 0, | ||
| 125 | .stage = | ||
| 126 | { | ||
| 127 | .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, | ||
| 128 | .pNext = nullptr, | ||
| 129 | .flags = 0, | ||
| 130 | .stage = VK_SHADER_STAGE_COMPUTE_BIT, | ||
| 131 | .module = *shader_module, | ||
| 132 | .pName = "main", | ||
| 133 | .pSpecializationInfo = nullptr, | ||
| 134 | }, | ||
| 135 | .layout = *layout, | ||
| 136 | .basePipelineHandle = nullptr, | ||
| 137 | .basePipelineIndex = 0, | ||
| 138 | }; | ||
| 139 | |||
| 140 | const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT subgroup_size_ci{ | ||
| 141 | .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT, | ||
| 142 | .pNext = nullptr, | ||
| 143 | .requiredSubgroupSize = GuestWarpSize, | ||
| 144 | }; | ||
| 145 | |||
| 146 | if (entries.uses_warps && device.IsGuestWarpSizeSupported(VK_SHADER_STAGE_COMPUTE_BIT)) { | ||
| 147 | ci.stage.pNext = &subgroup_size_ci; | ||
| 148 | } | ||
| 149 | |||
| 150 | return device.GetLogical().CreateComputePipeline(ci); | ||
| 151 | } | ||
| 152 | 20 | ||
| 153 | } // namespace Vulkan | 21 | } // namespace Vulkan |
diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.h b/src/video_core/renderer_vulkan/vk_compute_pipeline.h index 7e16575ac..433d8bb3d 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.h +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.h | |||
| @@ -6,7 +6,6 @@ | |||
| 6 | 6 | ||
| 7 | #include "common/common_types.h" | 7 | #include "common/common_types.h" |
| 8 | #include "video_core/renderer_vulkan/vk_descriptor_pool.h" | 8 | #include "video_core/renderer_vulkan/vk_descriptor_pool.h" |
| 9 | #include "video_core/renderer_vulkan/vk_shader_decompiler.h" | ||
| 10 | #include "video_core/vulkan_common/vulkan_wrapper.h" | 9 | #include "video_core/vulkan_common/vulkan_wrapper.h" |
| 11 | 10 | ||
| 12 | namespace Vulkan { | 11 | namespace Vulkan { |
| @@ -15,50 +14,10 @@ class Device; | |||
| 15 | class VKScheduler; | 14 | class VKScheduler; |
| 16 | class VKUpdateDescriptorQueue; | 15 | class VKUpdateDescriptorQueue; |
| 17 | 16 | ||
| 18 | class VKComputePipeline final { | 17 | class ComputePipeline { |
| 19 | public: | 18 | public: |
| 20 | explicit VKComputePipeline(const Device& device_, VKScheduler& scheduler_, | 19 | explicit ComputePipeline(); |
| 21 | VKDescriptorPool& descriptor_pool_, | 20 | ~ComputePipeline(); |
| 22 | VKUpdateDescriptorQueue& update_descriptor_queue_, | ||
| 23 | const SPIRVShader& shader_); | ||
| 24 | ~VKComputePipeline(); | ||
| 25 | |||
| 26 | VkDescriptorSet CommitDescriptorSet(); | ||
| 27 | |||
| 28 | VkPipeline GetHandle() const { | ||
| 29 | return *pipeline; | ||
| 30 | } | ||
| 31 | |||
| 32 | VkPipelineLayout GetLayout() const { | ||
| 33 | return *layout; | ||
| 34 | } | ||
| 35 | |||
| 36 | const ShaderEntries& GetEntries() const { | ||
| 37 | return entries; | ||
| 38 | } | ||
| 39 | |||
| 40 | private: | ||
| 41 | vk::DescriptorSetLayout CreateDescriptorSetLayout() const; | ||
| 42 | |||
| 43 | vk::PipelineLayout CreatePipelineLayout() const; | ||
| 44 | |||
| 45 | vk::DescriptorUpdateTemplateKHR CreateDescriptorUpdateTemplate() const; | ||
| 46 | |||
| 47 | vk::ShaderModule CreateShaderModule(const std::vector<u32>& code) const; | ||
| 48 | |||
| 49 | vk::Pipeline CreatePipeline() const; | ||
| 50 | |||
| 51 | const Device& device; | ||
| 52 | VKScheduler& scheduler; | ||
| 53 | ShaderEntries entries; | ||
| 54 | |||
| 55 | vk::DescriptorSetLayout descriptor_set_layout; | ||
| 56 | DescriptorAllocator descriptor_allocator; | ||
| 57 | VKUpdateDescriptorQueue& update_descriptor_queue; | ||
| 58 | vk::PipelineLayout layout; | ||
| 59 | vk::DescriptorUpdateTemplateKHR descriptor_template; | ||
| 60 | vk::ShaderModule shader_module; | ||
| 61 | vk::Pipeline pipeline; | ||
| 62 | }; | 21 | }; |
| 63 | 22 | ||
| 64 | } // namespace Vulkan | 23 | } // namespace Vulkan |
diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp deleted file mode 100644 index fc6dd83eb..000000000 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ /dev/null | |||
| @@ -1,484 +0,0 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <algorithm> | ||
| 6 | #include <array> | ||
| 7 | #include <cstring> | ||
| 8 | #include <vector> | ||
| 9 | |||
| 10 | #include "common/common_types.h" | ||
| 11 | #include "common/microprofile.h" | ||
| 12 | #include "video_core/renderer_vulkan/fixed_pipeline_state.h" | ||
| 13 | #include "video_core/renderer_vulkan/maxwell_to_vk.h" | ||
| 14 | #include "video_core/renderer_vulkan/vk_descriptor_pool.h" | ||
| 15 | #include "video_core/renderer_vulkan/vk_graphics_pipeline.h" | ||
| 16 | #include "video_core/renderer_vulkan/vk_pipeline_cache.h" | ||
| 17 | #include "video_core/renderer_vulkan/vk_scheduler.h" | ||
| 18 | #include "video_core/renderer_vulkan/vk_update_descriptor.h" | ||
| 19 | #include "video_core/vulkan_common/vulkan_device.h" | ||
| 20 | #include "video_core/vulkan_common/vulkan_wrapper.h" | ||
| 21 | |||
| 22 | namespace Vulkan { | ||
| 23 | |||
| 24 | MICROPROFILE_DECLARE(Vulkan_PipelineCache); | ||
| 25 | |||
| 26 | namespace { | ||
| 27 | |||
| 28 | template <class StencilFace> | ||
| 29 | VkStencilOpState GetStencilFaceState(const StencilFace& face) { | ||
| 30 | return { | ||
| 31 | .failOp = MaxwellToVK::StencilOp(face.ActionStencilFail()), | ||
| 32 | .passOp = MaxwellToVK::StencilOp(face.ActionDepthPass()), | ||
| 33 | .depthFailOp = MaxwellToVK::StencilOp(face.ActionDepthFail()), | ||
| 34 | .compareOp = MaxwellToVK::ComparisonOp(face.TestFunc()), | ||
| 35 | .compareMask = 0, | ||
| 36 | .writeMask = 0, | ||
| 37 | .reference = 0, | ||
| 38 | }; | ||
| 39 | } | ||
| 40 | |||
| 41 | bool SupportsPrimitiveRestart(VkPrimitiveTopology topology) { | ||
| 42 | static constexpr std::array unsupported_topologies = { | ||
| 43 | VK_PRIMITIVE_TOPOLOGY_POINT_LIST, | ||
| 44 | VK_PRIMITIVE_TOPOLOGY_LINE_LIST, | ||
| 45 | VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST, | ||
| 46 | VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY, | ||
| 47 | VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY, | ||
| 48 | VK_PRIMITIVE_TOPOLOGY_PATCH_LIST}; | ||
| 49 | return std::find(std::begin(unsupported_topologies), std::end(unsupported_topologies), | ||
| 50 | topology) == std::end(unsupported_topologies); | ||
| 51 | } | ||
| 52 | |||
| 53 | VkViewportSwizzleNV UnpackViewportSwizzle(u16 swizzle) { | ||
| 54 | union Swizzle { | ||
| 55 | u32 raw; | ||
| 56 | BitField<0, 3, Maxwell::ViewportSwizzle> x; | ||
| 57 | BitField<4, 3, Maxwell::ViewportSwizzle> y; | ||
| 58 | BitField<8, 3, Maxwell::ViewportSwizzle> z; | ||
| 59 | BitField<12, 3, Maxwell::ViewportSwizzle> w; | ||
| 60 | }; | ||
| 61 | const Swizzle unpacked{swizzle}; | ||
| 62 | |||
| 63 | return { | ||
| 64 | .x = MaxwellToVK::ViewportSwizzle(unpacked.x), | ||
| 65 | .y = MaxwellToVK::ViewportSwizzle(unpacked.y), | ||
| 66 | .z = MaxwellToVK::ViewportSwizzle(unpacked.z), | ||
| 67 | .w = MaxwellToVK::ViewportSwizzle(unpacked.w), | ||
| 68 | }; | ||
| 69 | } | ||
| 70 | |||
| 71 | VkSampleCountFlagBits ConvertMsaaMode(Tegra::Texture::MsaaMode msaa_mode) { | ||
| 72 | switch (msaa_mode) { | ||
| 73 | case Tegra::Texture::MsaaMode::Msaa1x1: | ||
| 74 | return VK_SAMPLE_COUNT_1_BIT; | ||
| 75 | case Tegra::Texture::MsaaMode::Msaa2x1: | ||
| 76 | case Tegra::Texture::MsaaMode::Msaa2x1_D3D: | ||
| 77 | return VK_SAMPLE_COUNT_2_BIT; | ||
| 78 | case Tegra::Texture::MsaaMode::Msaa2x2: | ||
| 79 | case Tegra::Texture::MsaaMode::Msaa2x2_VC4: | ||
| 80 | case Tegra::Texture::MsaaMode::Msaa2x2_VC12: | ||
| 81 | return VK_SAMPLE_COUNT_4_BIT; | ||
| 82 | case Tegra::Texture::MsaaMode::Msaa4x2: | ||
| 83 | case Tegra::Texture::MsaaMode::Msaa4x2_D3D: | ||
| 84 | case Tegra::Texture::MsaaMode::Msaa4x2_VC8: | ||
| 85 | case Tegra::Texture::MsaaMode::Msaa4x2_VC24: | ||
| 86 | return VK_SAMPLE_COUNT_8_BIT; | ||
| 87 | case Tegra::Texture::MsaaMode::Msaa4x4: | ||
| 88 | return VK_SAMPLE_COUNT_16_BIT; | ||
| 89 | default: | ||
| 90 | UNREACHABLE_MSG("Invalid msaa_mode={}", static_cast<int>(msaa_mode)); | ||
| 91 | return VK_SAMPLE_COUNT_1_BIT; | ||
| 92 | } | ||
| 93 | } | ||
| 94 | |||
| 95 | } // Anonymous namespace | ||
| 96 | |||
| 97 | VKGraphicsPipeline::VKGraphicsPipeline(const Device& device_, VKScheduler& scheduler_, | ||
| 98 | VKDescriptorPool& descriptor_pool_, | ||
| 99 | VKUpdateDescriptorQueue& update_descriptor_queue_, | ||
| 100 | const GraphicsPipelineCacheKey& key, | ||
| 101 | vk::Span<VkDescriptorSetLayoutBinding> bindings, | ||
| 102 | const SPIRVProgram& program, u32 num_color_buffers) | ||
| 103 | : device{device_}, scheduler{scheduler_}, cache_key{key}, hash{cache_key.Hash()}, | ||
| 104 | descriptor_set_layout{CreateDescriptorSetLayout(bindings)}, | ||
| 105 | descriptor_allocator{descriptor_pool_, *descriptor_set_layout}, | ||
| 106 | update_descriptor_queue{update_descriptor_queue_}, layout{CreatePipelineLayout()}, | ||
| 107 | descriptor_template{CreateDescriptorUpdateTemplate(program)}, | ||
| 108 | modules(CreateShaderModules(program)), | ||
| 109 | pipeline(CreatePipeline(program, cache_key.renderpass, num_color_buffers)) {} | ||
| 110 | |||
| 111 | VKGraphicsPipeline::~VKGraphicsPipeline() = default; | ||
| 112 | |||
| 113 | VkDescriptorSet VKGraphicsPipeline::CommitDescriptorSet() { | ||
| 114 | if (!descriptor_template) { | ||
| 115 | return {}; | ||
| 116 | } | ||
| 117 | const VkDescriptorSet set = descriptor_allocator.Commit(); | ||
| 118 | update_descriptor_queue.Send(*descriptor_template, set); | ||
| 119 | return set; | ||
| 120 | } | ||
| 121 | |||
| 122 | vk::DescriptorSetLayout VKGraphicsPipeline::CreateDescriptorSetLayout( | ||
| 123 | vk::Span<VkDescriptorSetLayoutBinding> bindings) const { | ||
| 124 | const VkDescriptorSetLayoutCreateInfo ci{ | ||
| 125 | .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, | ||
| 126 | .pNext = nullptr, | ||
| 127 | .flags = 0, | ||
| 128 | .bindingCount = bindings.size(), | ||
| 129 | .pBindings = bindings.data(), | ||
| 130 | }; | ||
| 131 | return device.GetLogical().CreateDescriptorSetLayout(ci); | ||
| 132 | } | ||
| 133 | |||
| 134 | vk::PipelineLayout VKGraphicsPipeline::CreatePipelineLayout() const { | ||
| 135 | const VkPipelineLayoutCreateInfo ci{ | ||
| 136 | .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, | ||
| 137 | .pNext = nullptr, | ||
| 138 | .flags = 0, | ||
| 139 | .setLayoutCount = 1, | ||
| 140 | .pSetLayouts = descriptor_set_layout.address(), | ||
| 141 | .pushConstantRangeCount = 0, | ||
| 142 | .pPushConstantRanges = nullptr, | ||
| 143 | }; | ||
| 144 | return device.GetLogical().CreatePipelineLayout(ci); | ||
| 145 | } | ||
| 146 | |||
| 147 | vk::DescriptorUpdateTemplateKHR VKGraphicsPipeline::CreateDescriptorUpdateTemplate( | ||
| 148 | const SPIRVProgram& program) const { | ||
| 149 | std::vector<VkDescriptorUpdateTemplateEntry> template_entries; | ||
| 150 | u32 binding = 0; | ||
| 151 | u32 offset = 0; | ||
| 152 | for (const auto& stage : program) { | ||
| 153 | if (stage) { | ||
| 154 | FillDescriptorUpdateTemplateEntries(stage->entries, binding, offset, template_entries); | ||
| 155 | } | ||
| 156 | } | ||
| 157 | if (template_entries.empty()) { | ||
| 158 | // If the shader doesn't use descriptor sets, skip template creation. | ||
| 159 | return {}; | ||
| 160 | } | ||
| 161 | |||
| 162 | const VkDescriptorUpdateTemplateCreateInfoKHR ci{ | ||
| 163 | .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_UPDATE_TEMPLATE_CREATE_INFO_KHR, | ||
| 164 | .pNext = nullptr, | ||
| 165 | .flags = 0, | ||
| 166 | .descriptorUpdateEntryCount = static_cast<u32>(template_entries.size()), | ||
| 167 | .pDescriptorUpdateEntries = template_entries.data(), | ||
| 168 | .templateType = VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_DESCRIPTOR_SET_KHR, | ||
| 169 | .descriptorSetLayout = *descriptor_set_layout, | ||
| 170 | .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS, | ||
| 171 | .pipelineLayout = *layout, | ||
| 172 | .set = DESCRIPTOR_SET, | ||
| 173 | }; | ||
| 174 | return device.GetLogical().CreateDescriptorUpdateTemplateKHR(ci); | ||
| 175 | } | ||
| 176 | |||
| 177 | std::vector<vk::ShaderModule> VKGraphicsPipeline::CreateShaderModules( | ||
| 178 | const SPIRVProgram& program) const { | ||
| 179 | VkShaderModuleCreateInfo ci{ | ||
| 180 | .sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO, | ||
| 181 | .pNext = nullptr, | ||
| 182 | .flags = 0, | ||
| 183 | .codeSize = 0, | ||
| 184 | .pCode = nullptr, | ||
| 185 | }; | ||
| 186 | |||
| 187 | std::vector<vk::ShaderModule> shader_modules; | ||
| 188 | shader_modules.reserve(Maxwell::MaxShaderStage); | ||
| 189 | for (std::size_t i = 0; i < Maxwell::MaxShaderStage; ++i) { | ||
| 190 | const auto& stage = program[i]; | ||
| 191 | if (!stage) { | ||
| 192 | continue; | ||
| 193 | } | ||
| 194 | |||
| 195 | device.SaveShader(stage->code); | ||
| 196 | |||
| 197 | ci.codeSize = stage->code.size() * sizeof(u32); | ||
| 198 | ci.pCode = stage->code.data(); | ||
| 199 | shader_modules.push_back(device.GetLogical().CreateShaderModule(ci)); | ||
| 200 | } | ||
| 201 | return shader_modules; | ||
| 202 | } | ||
| 203 | |||
| 204 | vk::Pipeline VKGraphicsPipeline::CreatePipeline(const SPIRVProgram& program, | ||
| 205 | VkRenderPass renderpass, | ||
| 206 | u32 num_color_buffers) const { | ||
| 207 | const auto& state = cache_key.fixed_state; | ||
| 208 | const auto& viewport_swizzles = state.viewport_swizzles; | ||
| 209 | |||
| 210 | FixedPipelineState::DynamicState dynamic; | ||
| 211 | if (device.IsExtExtendedDynamicStateSupported()) { | ||
| 212 | // Insert dummy values, as long as they are valid they don't matter as extended dynamic | ||
| 213 | // state is ignored | ||
| 214 | dynamic.raw1 = 0; | ||
| 215 | dynamic.raw2 = 0; | ||
| 216 | dynamic.vertex_strides.fill(0); | ||
| 217 | } else { | ||
| 218 | dynamic = state.dynamic_state; | ||
| 219 | } | ||
| 220 | |||
| 221 | std::vector<VkVertexInputBindingDescription> vertex_bindings; | ||
| 222 | std::vector<VkVertexInputBindingDivisorDescriptionEXT> vertex_binding_divisors; | ||
| 223 | for (std::size_t index = 0; index < Maxwell::NumVertexArrays; ++index) { | ||
| 224 | const bool instanced = state.binding_divisors[index] != 0; | ||
| 225 | const auto rate = instanced ? VK_VERTEX_INPUT_RATE_INSTANCE : VK_VERTEX_INPUT_RATE_VERTEX; | ||
| 226 | vertex_bindings.push_back({ | ||
| 227 | .binding = static_cast<u32>(index), | ||
| 228 | .stride = dynamic.vertex_strides[index], | ||
| 229 | .inputRate = rate, | ||
| 230 | }); | ||
| 231 | if (instanced) { | ||
| 232 | vertex_binding_divisors.push_back({ | ||
| 233 | .binding = static_cast<u32>(index), | ||
| 234 | .divisor = state.binding_divisors[index], | ||
| 235 | }); | ||
| 236 | } | ||
| 237 | } | ||
| 238 | |||
| 239 | std::vector<VkVertexInputAttributeDescription> vertex_attributes; | ||
| 240 | const auto& input_attributes = program[0]->entries.attributes; | ||
| 241 | for (std::size_t index = 0; index < state.attributes.size(); ++index) { | ||
| 242 | const auto& attribute = state.attributes[index]; | ||
| 243 | if (!attribute.enabled) { | ||
| 244 | continue; | ||
| 245 | } | ||
| 246 | if (!input_attributes.contains(static_cast<u32>(index))) { | ||
| 247 | // Skip attributes not used by the vertex shaders. | ||
| 248 | continue; | ||
| 249 | } | ||
| 250 | vertex_attributes.push_back({ | ||
| 251 | .location = static_cast<u32>(index), | ||
| 252 | .binding = attribute.buffer, | ||
| 253 | .format = MaxwellToVK::VertexFormat(attribute.Type(), attribute.Size()), | ||
| 254 | .offset = attribute.offset, | ||
| 255 | }); | ||
| 256 | } | ||
| 257 | |||
| 258 | VkPipelineVertexInputStateCreateInfo vertex_input_ci{ | ||
| 259 | .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, | ||
| 260 | .pNext = nullptr, | ||
| 261 | .flags = 0, | ||
| 262 | .vertexBindingDescriptionCount = static_cast<u32>(vertex_bindings.size()), | ||
| 263 | .pVertexBindingDescriptions = vertex_bindings.data(), | ||
| 264 | .vertexAttributeDescriptionCount = static_cast<u32>(vertex_attributes.size()), | ||
| 265 | .pVertexAttributeDescriptions = vertex_attributes.data(), | ||
| 266 | }; | ||
| 267 | |||
| 268 | const VkPipelineVertexInputDivisorStateCreateInfoEXT input_divisor_ci{ | ||
| 269 | .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_DIVISOR_STATE_CREATE_INFO_EXT, | ||
| 270 | .pNext = nullptr, | ||
| 271 | .vertexBindingDivisorCount = static_cast<u32>(vertex_binding_divisors.size()), | ||
| 272 | .pVertexBindingDivisors = vertex_binding_divisors.data(), | ||
| 273 | }; | ||
| 274 | if (!vertex_binding_divisors.empty()) { | ||
| 275 | vertex_input_ci.pNext = &input_divisor_ci; | ||
| 276 | } | ||
| 277 | |||
| 278 | const auto input_assembly_topology = MaxwellToVK::PrimitiveTopology(device, state.topology); | ||
| 279 | const VkPipelineInputAssemblyStateCreateInfo input_assembly_ci{ | ||
| 280 | .sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO, | ||
| 281 | .pNext = nullptr, | ||
| 282 | .flags = 0, | ||
| 283 | .topology = MaxwellToVK::PrimitiveTopology(device, state.topology), | ||
| 284 | .primitiveRestartEnable = state.primitive_restart_enable != 0 && | ||
| 285 | SupportsPrimitiveRestart(input_assembly_topology), | ||
| 286 | }; | ||
| 287 | |||
| 288 | const VkPipelineTessellationStateCreateInfo tessellation_ci{ | ||
| 289 | .sType = VK_STRUCTURE_TYPE_PIPELINE_TESSELLATION_STATE_CREATE_INFO, | ||
| 290 | .pNext = nullptr, | ||
| 291 | .flags = 0, | ||
| 292 | .patchControlPoints = state.patch_control_points_minus_one.Value() + 1, | ||
| 293 | }; | ||
| 294 | |||
| 295 | VkPipelineViewportStateCreateInfo viewport_ci{ | ||
| 296 | .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO, | ||
| 297 | .pNext = nullptr, | ||
| 298 | .flags = 0, | ||
| 299 | .viewportCount = Maxwell::NumViewports, | ||
| 300 | .pViewports = nullptr, | ||
| 301 | .scissorCount = Maxwell::NumViewports, | ||
| 302 | .pScissors = nullptr, | ||
| 303 | }; | ||
| 304 | |||
| 305 | std::array<VkViewportSwizzleNV, Maxwell::NumViewports> swizzles; | ||
| 306 | std::ranges::transform(viewport_swizzles, swizzles.begin(), UnpackViewportSwizzle); | ||
| 307 | VkPipelineViewportSwizzleStateCreateInfoNV swizzle_ci{ | ||
| 308 | .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_SWIZZLE_STATE_CREATE_INFO_NV, | ||
| 309 | .pNext = nullptr, | ||
| 310 | .flags = 0, | ||
| 311 | .viewportCount = Maxwell::NumViewports, | ||
| 312 | .pViewportSwizzles = swizzles.data(), | ||
| 313 | }; | ||
| 314 | if (device.IsNvViewportSwizzleSupported()) { | ||
| 315 | viewport_ci.pNext = &swizzle_ci; | ||
| 316 | } | ||
| 317 | |||
| 318 | const VkPipelineRasterizationStateCreateInfo rasterization_ci{ | ||
| 319 | .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO, | ||
| 320 | .pNext = nullptr, | ||
| 321 | .flags = 0, | ||
| 322 | .depthClampEnable = | ||
| 323 | static_cast<VkBool32>(state.depth_clamp_disabled == 0 ? VK_TRUE : VK_FALSE), | ||
| 324 | .rasterizerDiscardEnable = | ||
| 325 | static_cast<VkBool32>(state.rasterize_enable == 0 ? VK_TRUE : VK_FALSE), | ||
| 326 | .polygonMode = VK_POLYGON_MODE_FILL, | ||
| 327 | .cullMode = static_cast<VkCullModeFlags>( | ||
| 328 | dynamic.cull_enable ? MaxwellToVK::CullFace(dynamic.CullFace()) : VK_CULL_MODE_NONE), | ||
| 329 | .frontFace = MaxwellToVK::FrontFace(dynamic.FrontFace()), | ||
| 330 | .depthBiasEnable = state.depth_bias_enable, | ||
| 331 | .depthBiasConstantFactor = 0.0f, | ||
| 332 | .depthBiasClamp = 0.0f, | ||
| 333 | .depthBiasSlopeFactor = 0.0f, | ||
| 334 | .lineWidth = 1.0f, | ||
| 335 | }; | ||
| 336 | |||
| 337 | const VkPipelineMultisampleStateCreateInfo multisample_ci{ | ||
| 338 | .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO, | ||
| 339 | .pNext = nullptr, | ||
| 340 | .flags = 0, | ||
| 341 | .rasterizationSamples = ConvertMsaaMode(state.msaa_mode), | ||
| 342 | .sampleShadingEnable = VK_FALSE, | ||
| 343 | .minSampleShading = 0.0f, | ||
| 344 | .pSampleMask = nullptr, | ||
| 345 | .alphaToCoverageEnable = VK_FALSE, | ||
| 346 | .alphaToOneEnable = VK_FALSE, | ||
| 347 | }; | ||
| 348 | |||
| 349 | const VkPipelineDepthStencilStateCreateInfo depth_stencil_ci{ | ||
| 350 | .sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO, | ||
| 351 | .pNext = nullptr, | ||
| 352 | .flags = 0, | ||
| 353 | .depthTestEnable = dynamic.depth_test_enable, | ||
| 354 | .depthWriteEnable = dynamic.depth_write_enable, | ||
| 355 | .depthCompareOp = dynamic.depth_test_enable | ||
| 356 | ? MaxwellToVK::ComparisonOp(dynamic.DepthTestFunc()) | ||
| 357 | : VK_COMPARE_OP_ALWAYS, | ||
| 358 | .depthBoundsTestEnable = dynamic.depth_bounds_enable, | ||
| 359 | .stencilTestEnable = dynamic.stencil_enable, | ||
| 360 | .front = GetStencilFaceState(dynamic.front), | ||
| 361 | .back = GetStencilFaceState(dynamic.back), | ||
| 362 | .minDepthBounds = 0.0f, | ||
| 363 | .maxDepthBounds = 0.0f, | ||
| 364 | }; | ||
| 365 | |||
| 366 | std::array<VkPipelineColorBlendAttachmentState, Maxwell::NumRenderTargets> cb_attachments; | ||
| 367 | for (std::size_t index = 0; index < num_color_buffers; ++index) { | ||
| 368 | static constexpr std::array COMPONENT_TABLE{ | ||
| 369 | VK_COLOR_COMPONENT_R_BIT, | ||
| 370 | VK_COLOR_COMPONENT_G_BIT, | ||
| 371 | VK_COLOR_COMPONENT_B_BIT, | ||
| 372 | VK_COLOR_COMPONENT_A_BIT, | ||
| 373 | }; | ||
| 374 | const auto& blend = state.attachments[index]; | ||
| 375 | |||
| 376 | VkColorComponentFlags color_components = 0; | ||
| 377 | for (std::size_t i = 0; i < COMPONENT_TABLE.size(); ++i) { | ||
| 378 | if (blend.Mask()[i]) { | ||
| 379 | color_components |= COMPONENT_TABLE[i]; | ||
| 380 | } | ||
| 381 | } | ||
| 382 | |||
| 383 | cb_attachments[index] = { | ||
| 384 | .blendEnable = blend.enable != 0, | ||
| 385 | .srcColorBlendFactor = MaxwellToVK::BlendFactor(blend.SourceRGBFactor()), | ||
| 386 | .dstColorBlendFactor = MaxwellToVK::BlendFactor(blend.DestRGBFactor()), | ||
| 387 | .colorBlendOp = MaxwellToVK::BlendEquation(blend.EquationRGB()), | ||
| 388 | .srcAlphaBlendFactor = MaxwellToVK::BlendFactor(blend.SourceAlphaFactor()), | ||
| 389 | .dstAlphaBlendFactor = MaxwellToVK::BlendFactor(blend.DestAlphaFactor()), | ||
| 390 | .alphaBlendOp = MaxwellToVK::BlendEquation(blend.EquationAlpha()), | ||
| 391 | .colorWriteMask = color_components, | ||
| 392 | }; | ||
| 393 | } | ||
| 394 | |||
| 395 | const VkPipelineColorBlendStateCreateInfo color_blend_ci{ | ||
| 396 | .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO, | ||
| 397 | .pNext = nullptr, | ||
| 398 | .flags = 0, | ||
| 399 | .logicOpEnable = VK_FALSE, | ||
| 400 | .logicOp = VK_LOGIC_OP_COPY, | ||
| 401 | .attachmentCount = num_color_buffers, | ||
| 402 | .pAttachments = cb_attachments.data(), | ||
| 403 | .blendConstants = {}, | ||
| 404 | }; | ||
| 405 | |||
| 406 | std::vector dynamic_states{ | ||
| 407 | VK_DYNAMIC_STATE_VIEWPORT, VK_DYNAMIC_STATE_SCISSOR, | ||
| 408 | VK_DYNAMIC_STATE_DEPTH_BIAS, VK_DYNAMIC_STATE_BLEND_CONSTANTS, | ||
| 409 | VK_DYNAMIC_STATE_DEPTH_BOUNDS, VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK, | ||
| 410 | VK_DYNAMIC_STATE_STENCIL_WRITE_MASK, VK_DYNAMIC_STATE_STENCIL_REFERENCE, | ||
| 411 | }; | ||
| 412 | if (device.IsExtExtendedDynamicStateSupported()) { | ||
| 413 | static constexpr std::array extended{ | ||
| 414 | VK_DYNAMIC_STATE_CULL_MODE_EXT, | ||
| 415 | VK_DYNAMIC_STATE_FRONT_FACE_EXT, | ||
| 416 | VK_DYNAMIC_STATE_VERTEX_INPUT_BINDING_STRIDE_EXT, | ||
| 417 | VK_DYNAMIC_STATE_DEPTH_TEST_ENABLE_EXT, | ||
| 418 | VK_DYNAMIC_STATE_DEPTH_WRITE_ENABLE_EXT, | ||
| 419 | VK_DYNAMIC_STATE_DEPTH_COMPARE_OP_EXT, | ||
| 420 | VK_DYNAMIC_STATE_DEPTH_BOUNDS_TEST_ENABLE_EXT, | ||
| 421 | VK_DYNAMIC_STATE_STENCIL_TEST_ENABLE_EXT, | ||
| 422 | VK_DYNAMIC_STATE_STENCIL_OP_EXT, | ||
| 423 | }; | ||
| 424 | dynamic_states.insert(dynamic_states.end(), extended.begin(), extended.end()); | ||
| 425 | } | ||
| 426 | |||
| 427 | const VkPipelineDynamicStateCreateInfo dynamic_state_ci{ | ||
| 428 | .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO, | ||
| 429 | .pNext = nullptr, | ||
| 430 | .flags = 0, | ||
| 431 | .dynamicStateCount = static_cast<u32>(dynamic_states.size()), | ||
| 432 | .pDynamicStates = dynamic_states.data(), | ||
| 433 | }; | ||
| 434 | |||
| 435 | const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT subgroup_size_ci{ | ||
| 436 | .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT, | ||
| 437 | .pNext = nullptr, | ||
| 438 | .requiredSubgroupSize = GuestWarpSize, | ||
| 439 | }; | ||
| 440 | |||
| 441 | std::vector<VkPipelineShaderStageCreateInfo> shader_stages; | ||
| 442 | std::size_t module_index = 0; | ||
| 443 | for (std::size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) { | ||
| 444 | if (!program[stage]) { | ||
| 445 | continue; | ||
| 446 | } | ||
| 447 | |||
| 448 | VkPipelineShaderStageCreateInfo& stage_ci = shader_stages.emplace_back(); | ||
| 449 | stage_ci.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO; | ||
| 450 | stage_ci.pNext = nullptr; | ||
| 451 | stage_ci.flags = 0; | ||
| 452 | stage_ci.stage = MaxwellToVK::ShaderStage(static_cast<Tegra::Engines::ShaderType>(stage)); | ||
| 453 | stage_ci.module = *modules[module_index++]; | ||
| 454 | stage_ci.pName = "main"; | ||
| 455 | stage_ci.pSpecializationInfo = nullptr; | ||
| 456 | |||
| 457 | if (program[stage]->entries.uses_warps && device.IsGuestWarpSizeSupported(stage_ci.stage)) { | ||
| 458 | stage_ci.pNext = &subgroup_size_ci; | ||
| 459 | } | ||
| 460 | } | ||
| 461 | return device.GetLogical().CreateGraphicsPipeline(VkGraphicsPipelineCreateInfo{ | ||
| 462 | .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, | ||
| 463 | .pNext = nullptr, | ||
| 464 | .flags = 0, | ||
| 465 | .stageCount = static_cast<u32>(shader_stages.size()), | ||
| 466 | .pStages = shader_stages.data(), | ||
| 467 | .pVertexInputState = &vertex_input_ci, | ||
| 468 | .pInputAssemblyState = &input_assembly_ci, | ||
| 469 | .pTessellationState = &tessellation_ci, | ||
| 470 | .pViewportState = &viewport_ci, | ||
| 471 | .pRasterizationState = &rasterization_ci, | ||
| 472 | .pMultisampleState = &multisample_ci, | ||
| 473 | .pDepthStencilState = &depth_stencil_ci, | ||
| 474 | .pColorBlendState = &color_blend_ci, | ||
| 475 | .pDynamicState = &dynamic_state_ci, | ||
| 476 | .layout = *layout, | ||
| 477 | .renderPass = renderpass, | ||
| 478 | .subpass = 0, | ||
| 479 | .basePipelineHandle = nullptr, | ||
| 480 | .basePipelineIndex = 0, | ||
| 481 | }); | ||
| 482 | } | ||
| 483 | |||
| 484 | } // namespace Vulkan | ||
diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h deleted file mode 100644 index 8b6a98fe0..000000000 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h +++ /dev/null | |||
| @@ -1,103 +0,0 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <array> | ||
| 8 | #include <optional> | ||
| 9 | #include <vector> | ||
| 10 | |||
| 11 | #include "common/common_types.h" | ||
| 12 | #include "video_core/engines/maxwell_3d.h" | ||
| 13 | #include "video_core/renderer_vulkan/fixed_pipeline_state.h" | ||
| 14 | #include "video_core/renderer_vulkan/vk_descriptor_pool.h" | ||
| 15 | #include "video_core/renderer_vulkan/vk_shader_decompiler.h" | ||
| 16 | #include "video_core/vulkan_common/vulkan_wrapper.h" | ||
| 17 | |||
| 18 | namespace Vulkan { | ||
| 19 | |||
| 20 | using Maxwell = Tegra::Engines::Maxwell3D::Regs; | ||
| 21 | |||
| 22 | struct GraphicsPipelineCacheKey { | ||
| 23 | VkRenderPass renderpass; | ||
| 24 | std::array<GPUVAddr, Maxwell::MaxShaderProgram> shaders; | ||
| 25 | FixedPipelineState fixed_state; | ||
| 26 | |||
| 27 | std::size_t Hash() const noexcept; | ||
| 28 | |||
| 29 | bool operator==(const GraphicsPipelineCacheKey& rhs) const noexcept; | ||
| 30 | |||
| 31 | bool operator!=(const GraphicsPipelineCacheKey& rhs) const noexcept { | ||
| 32 | return !operator==(rhs); | ||
| 33 | } | ||
| 34 | |||
| 35 | std::size_t Size() const noexcept { | ||
| 36 | return sizeof(renderpass) + sizeof(shaders) + fixed_state.Size(); | ||
| 37 | } | ||
| 38 | }; | ||
| 39 | static_assert(std::has_unique_object_representations_v<GraphicsPipelineCacheKey>); | ||
| 40 | static_assert(std::is_trivially_copyable_v<GraphicsPipelineCacheKey>); | ||
| 41 | static_assert(std::is_trivially_constructible_v<GraphicsPipelineCacheKey>); | ||
| 42 | |||
| 43 | class Device; | ||
| 44 | class VKDescriptorPool; | ||
| 45 | class VKScheduler; | ||
| 46 | class VKUpdateDescriptorQueue; | ||
| 47 | |||
| 48 | using SPIRVProgram = std::array<std::optional<SPIRVShader>, Maxwell::MaxShaderStage>; | ||
| 49 | |||
| 50 | class VKGraphicsPipeline final { | ||
| 51 | public: | ||
| 52 | explicit VKGraphicsPipeline(const Device& device_, VKScheduler& scheduler_, | ||
| 53 | VKDescriptorPool& descriptor_pool, | ||
| 54 | VKUpdateDescriptorQueue& update_descriptor_queue_, | ||
| 55 | const GraphicsPipelineCacheKey& key, | ||
| 56 | vk::Span<VkDescriptorSetLayoutBinding> bindings, | ||
| 57 | const SPIRVProgram& program, u32 num_color_buffers); | ||
| 58 | ~VKGraphicsPipeline(); | ||
| 59 | |||
| 60 | VkDescriptorSet CommitDescriptorSet(); | ||
| 61 | |||
| 62 | VkPipeline GetHandle() const { | ||
| 63 | return *pipeline; | ||
| 64 | } | ||
| 65 | |||
| 66 | VkPipelineLayout GetLayout() const { | ||
| 67 | return *layout; | ||
| 68 | } | ||
| 69 | |||
| 70 | GraphicsPipelineCacheKey GetCacheKey() const { | ||
| 71 | return cache_key; | ||
| 72 | } | ||
| 73 | |||
| 74 | private: | ||
| 75 | vk::DescriptorSetLayout CreateDescriptorSetLayout( | ||
| 76 | vk::Span<VkDescriptorSetLayoutBinding> bindings) const; | ||
| 77 | |||
| 78 | vk::PipelineLayout CreatePipelineLayout() const; | ||
| 79 | |||
| 80 | vk::DescriptorUpdateTemplateKHR CreateDescriptorUpdateTemplate( | ||
| 81 | const SPIRVProgram& program) const; | ||
| 82 | |||
| 83 | std::vector<vk::ShaderModule> CreateShaderModules(const SPIRVProgram& program) const; | ||
| 84 | |||
| 85 | vk::Pipeline CreatePipeline(const SPIRVProgram& program, VkRenderPass renderpass, | ||
| 86 | u32 num_color_buffers) const; | ||
| 87 | |||
| 88 | const Device& device; | ||
| 89 | VKScheduler& scheduler; | ||
| 90 | const GraphicsPipelineCacheKey cache_key; | ||
| 91 | const u64 hash; | ||
| 92 | |||
| 93 | vk::DescriptorSetLayout descriptor_set_layout; | ||
| 94 | DescriptorAllocator descriptor_allocator; | ||
| 95 | VKUpdateDescriptorQueue& update_descriptor_queue; | ||
| 96 | vk::PipelineLayout layout; | ||
| 97 | vk::DescriptorUpdateTemplateKHR descriptor_template; | ||
| 98 | std::vector<vk::ShaderModule> modules; | ||
| 99 | |||
| 100 | vk::Pipeline pipeline; | ||
| 101 | }; | ||
| 102 | |||
| 103 | } // namespace Vulkan | ||
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 8991505ca..7d0ba1180 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp | |||
| @@ -19,49 +19,27 @@ | |||
| 19 | #include "video_core/renderer_vulkan/maxwell_to_vk.h" | 19 | #include "video_core/renderer_vulkan/maxwell_to_vk.h" |
| 20 | #include "video_core/renderer_vulkan/vk_compute_pipeline.h" | 20 | #include "video_core/renderer_vulkan/vk_compute_pipeline.h" |
| 21 | #include "video_core/renderer_vulkan/vk_descriptor_pool.h" | 21 | #include "video_core/renderer_vulkan/vk_descriptor_pool.h" |
| 22 | #include "video_core/renderer_vulkan/vk_graphics_pipeline.h" | ||
| 23 | #include "video_core/renderer_vulkan/vk_pipeline_cache.h" | 22 | #include "video_core/renderer_vulkan/vk_pipeline_cache.h" |
| 24 | #include "video_core/renderer_vulkan/vk_rasterizer.h" | 23 | #include "video_core/renderer_vulkan/vk_rasterizer.h" |
| 25 | #include "video_core/renderer_vulkan/vk_scheduler.h" | 24 | #include "video_core/renderer_vulkan/vk_scheduler.h" |
| 26 | #include "video_core/renderer_vulkan/vk_update_descriptor.h" | 25 | #include "video_core/renderer_vulkan/vk_update_descriptor.h" |
| 27 | #include "video_core/shader/compiler_settings.h" | ||
| 28 | #include "video_core/shader/memory_util.h" | ||
| 29 | #include "video_core/shader_cache.h" | 26 | #include "video_core/shader_cache.h" |
| 30 | #include "video_core/shader_notify.h" | 27 | #include "video_core/shader_notify.h" |
| 31 | #include "video_core/vulkan_common/vulkan_device.h" | 28 | #include "video_core/vulkan_common/vulkan_device.h" |
| 32 | #include "video_core/vulkan_common/vulkan_wrapper.h" | 29 | #include "video_core/vulkan_common/vulkan_wrapper.h" |
| 33 | 30 | ||
| 34 | namespace Vulkan { | 31 | namespace Vulkan { |
| 35 | |||
| 36 | MICROPROFILE_DECLARE(Vulkan_PipelineCache); | 32 | MICROPROFILE_DECLARE(Vulkan_PipelineCache); |
| 37 | 33 | ||
| 38 | using Tegra::Engines::ShaderType; | 34 | using Tegra::Engines::ShaderType; |
| 39 | using VideoCommon::Shader::GetShaderAddress; | ||
| 40 | using VideoCommon::Shader::GetShaderCode; | ||
| 41 | using VideoCommon::Shader::KERNEL_MAIN_OFFSET; | ||
| 42 | using VideoCommon::Shader::ProgramCode; | ||
| 43 | using VideoCommon::Shader::STAGE_MAIN_OFFSET; | ||
| 44 | 35 | ||
| 45 | namespace { | 36 | namespace { |
| 46 | 37 | size_t StageFromProgram(size_t program) { | |
| 47 | constexpr VkDescriptorType UNIFORM_BUFFER = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; | ||
| 48 | constexpr VkDescriptorType STORAGE_BUFFER = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; | ||
| 49 | constexpr VkDescriptorType UNIFORM_TEXEL_BUFFER = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER; | ||
| 50 | constexpr VkDescriptorType COMBINED_IMAGE_SAMPLER = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; | ||
| 51 | constexpr VkDescriptorType STORAGE_TEXEL_BUFFER = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER; | ||
| 52 | constexpr VkDescriptorType STORAGE_IMAGE = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE; | ||
| 53 | |||
| 54 | constexpr VideoCommon::Shader::CompilerSettings compiler_settings{ | ||
| 55 | .depth = VideoCommon::Shader::CompileDepth::FullDecompile, | ||
| 56 | .disable_else_derivation = true, | ||
| 57 | }; | ||
| 58 | |||
| 59 | constexpr std::size_t GetStageFromProgram(std::size_t program) { | ||
| 60 | return program == 0 ? 0 : program - 1; | 38 | return program == 0 ? 0 : program - 1; |
| 61 | } | 39 | } |
| 62 | 40 | ||
| 63 | constexpr ShaderType GetStageFromProgram(Maxwell::ShaderProgram program) { | 41 | ShaderType StageFromProgram(Maxwell::ShaderProgram program) { |
| 64 | return static_cast<ShaderType>(GetStageFromProgram(static_cast<std::size_t>(program))); | 42 | return static_cast<ShaderType>(StageFromProgram(static_cast<size_t>(program))); |
| 65 | } | 43 | } |
| 66 | 44 | ||
| 67 | ShaderType GetShaderType(Maxwell::ShaderProgram program) { | 45 | ShaderType GetShaderType(Maxwell::ShaderProgram program) { |
| @@ -81,165 +59,35 @@ ShaderType GetShaderType(Maxwell::ShaderProgram program) { | |||
| 81 | return ShaderType::Vertex; | 59 | return ShaderType::Vertex; |
| 82 | } | 60 | } |
| 83 | } | 61 | } |
| 84 | |||
| 85 | template <VkDescriptorType descriptor_type, class Container> | ||
| 86 | void AddBindings(std::vector<VkDescriptorSetLayoutBinding>& bindings, u32& binding, | ||
| 87 | VkShaderStageFlags stage_flags, const Container& container) { | ||
| 88 | const u32 num_entries = static_cast<u32>(std::size(container)); | ||
| 89 | for (std::size_t i = 0; i < num_entries; ++i) { | ||
| 90 | u32 count = 1; | ||
| 91 | if constexpr (descriptor_type == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER) { | ||
| 92 | // Combined image samplers can be arrayed. | ||
| 93 | count = container[i].size; | ||
| 94 | } | ||
| 95 | bindings.push_back({ | ||
| 96 | .binding = binding++, | ||
| 97 | .descriptorType = descriptor_type, | ||
| 98 | .descriptorCount = count, | ||
| 99 | .stageFlags = stage_flags, | ||
| 100 | .pImmutableSamplers = nullptr, | ||
| 101 | }); | ||
| 102 | } | ||
| 103 | } | ||
| 104 | |||
| 105 | u32 FillDescriptorLayout(const ShaderEntries& entries, | ||
| 106 | std::vector<VkDescriptorSetLayoutBinding>& bindings, | ||
| 107 | Maxwell::ShaderProgram program_type, u32 base_binding) { | ||
| 108 | const ShaderType stage = GetStageFromProgram(program_type); | ||
| 109 | const VkShaderStageFlags flags = MaxwellToVK::ShaderStage(stage); | ||
| 110 | |||
| 111 | u32 binding = base_binding; | ||
| 112 | AddBindings<UNIFORM_BUFFER>(bindings, binding, flags, entries.const_buffers); | ||
| 113 | AddBindings<STORAGE_BUFFER>(bindings, binding, flags, entries.global_buffers); | ||
| 114 | AddBindings<UNIFORM_TEXEL_BUFFER>(bindings, binding, flags, entries.uniform_texels); | ||
| 115 | AddBindings<COMBINED_IMAGE_SAMPLER>(bindings, binding, flags, entries.samplers); | ||
| 116 | AddBindings<STORAGE_TEXEL_BUFFER>(bindings, binding, flags, entries.storage_texels); | ||
| 117 | AddBindings<STORAGE_IMAGE>(bindings, binding, flags, entries.images); | ||
| 118 | return binding; | ||
| 119 | } | ||
| 120 | |||
| 121 | } // Anonymous namespace | 62 | } // Anonymous namespace |
| 122 | 63 | ||
| 123 | std::size_t GraphicsPipelineCacheKey::Hash() const noexcept { | 64 | size_t ComputePipelineCacheKey::Hash() const noexcept { |
| 124 | const u64 hash = Common::CityHash64(reinterpret_cast<const char*>(this), Size()); | ||
| 125 | return static_cast<std::size_t>(hash); | ||
| 126 | } | ||
| 127 | |||
| 128 | bool GraphicsPipelineCacheKey::operator==(const GraphicsPipelineCacheKey& rhs) const noexcept { | ||
| 129 | return std::memcmp(&rhs, this, Size()) == 0; | ||
| 130 | } | ||
| 131 | |||
| 132 | std::size_t ComputePipelineCacheKey::Hash() const noexcept { | ||
| 133 | const u64 hash = Common::CityHash64(reinterpret_cast<const char*>(this), sizeof *this); | 65 | const u64 hash = Common::CityHash64(reinterpret_cast<const char*>(this), sizeof *this); |
| 134 | return static_cast<std::size_t>(hash); | 66 | return static_cast<size_t>(hash); |
| 135 | } | 67 | } |
| 136 | 68 | ||
| 137 | bool ComputePipelineCacheKey::operator==(const ComputePipelineCacheKey& rhs) const noexcept { | 69 | bool ComputePipelineCacheKey::operator==(const ComputePipelineCacheKey& rhs) const noexcept { |
| 138 | return std::memcmp(&rhs, this, sizeof *this) == 0; | 70 | return std::memcmp(&rhs, this, sizeof *this) == 0; |
| 139 | } | 71 | } |
| 140 | 72 | ||
| 141 | Shader::Shader(Tegra::Engines::ConstBufferEngineInterface& engine_, ShaderType stage_, | 73 | Shader::Shader() = default; |
| 142 | GPUVAddr gpu_addr_, VAddr cpu_addr_, ProgramCode program_code_, u32 main_offset_) | ||
| 143 | : gpu_addr(gpu_addr_), program_code(std::move(program_code_)), registry(stage_, engine_), | ||
| 144 | shader_ir(program_code, main_offset_, compiler_settings, registry), | ||
| 145 | entries(GenerateShaderEntries(shader_ir)) {} | ||
| 146 | 74 | ||
| 147 | Shader::~Shader() = default; | 75 | Shader::~Shader() = default; |
| 148 | 76 | ||
| 149 | VKPipelineCache::VKPipelineCache(RasterizerVulkan& rasterizer_, Tegra::GPU& gpu_, | 77 | PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::GPU& gpu_, |
| 150 | Tegra::Engines::Maxwell3D& maxwell3d_, | 78 | Tegra::Engines::Maxwell3D& maxwell3d_, |
| 151 | Tegra::Engines::KeplerCompute& kepler_compute_, | 79 | Tegra::Engines::KeplerCompute& kepler_compute_, |
| 152 | Tegra::MemoryManager& gpu_memory_, const Device& device_, | 80 | Tegra::MemoryManager& gpu_memory_, const Device& device_, |
| 153 | VKScheduler& scheduler_, VKDescriptorPool& descriptor_pool_, | 81 | VKScheduler& scheduler_, VKDescriptorPool& descriptor_pool_, |
| 154 | VKUpdateDescriptorQueue& update_descriptor_queue_) | 82 | VKUpdateDescriptorQueue& update_descriptor_queue_) |
| 155 | : VideoCommon::ShaderCache<Shader>{rasterizer_}, gpu{gpu_}, maxwell3d{maxwell3d_}, | 83 | : VideoCommon::ShaderCache<Shader>{rasterizer_}, gpu{gpu_}, maxwell3d{maxwell3d_}, |
| 156 | kepler_compute{kepler_compute_}, gpu_memory{gpu_memory_}, device{device_}, | 84 | kepler_compute{kepler_compute_}, gpu_memory{gpu_memory_}, device{device_}, |
| 157 | scheduler{scheduler_}, descriptor_pool{descriptor_pool_}, update_descriptor_queue{ | 85 | scheduler{scheduler_}, descriptor_pool{descriptor_pool_}, update_descriptor_queue{ |
| 158 | update_descriptor_queue_} {} | 86 | update_descriptor_queue_} {} |
| 159 | 87 | ||
| 160 | VKPipelineCache::~VKPipelineCache() = default; | 88 | PipelineCache::~PipelineCache() = default; |
| 161 | |||
| 162 | std::array<Shader*, Maxwell::MaxShaderProgram> VKPipelineCache::GetShaders() { | ||
| 163 | std::array<Shader*, Maxwell::MaxShaderProgram> shaders{}; | ||
| 164 | |||
| 165 | for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { | ||
| 166 | const auto program{static_cast<Maxwell::ShaderProgram>(index)}; | ||
| 167 | |||
| 168 | // Skip stages that are not enabled | ||
| 169 | if (!maxwell3d.regs.IsShaderConfigEnabled(index)) { | ||
| 170 | continue; | ||
| 171 | } | ||
| 172 | |||
| 173 | const GPUVAddr gpu_addr{GetShaderAddress(maxwell3d, program)}; | ||
| 174 | const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); | ||
| 175 | ASSERT(cpu_addr); | ||
| 176 | |||
| 177 | Shader* result = cpu_addr ? TryGet(*cpu_addr) : null_shader.get(); | ||
| 178 | if (!result) { | ||
| 179 | const u8* const host_ptr{gpu_memory.GetPointer(gpu_addr)}; | ||
| 180 | |||
| 181 | // No shader found - create a new one | ||
| 182 | static constexpr u32 stage_offset = STAGE_MAIN_OFFSET; | ||
| 183 | const auto stage = static_cast<ShaderType>(index == 0 ? 0 : index - 1); | ||
| 184 | ProgramCode code = GetShaderCode(gpu_memory, gpu_addr, host_ptr, false); | ||
| 185 | const std::size_t size_in_bytes = code.size() * sizeof(u64); | ||
| 186 | |||
| 187 | auto shader = std::make_unique<Shader>(maxwell3d, stage, gpu_addr, *cpu_addr, | ||
| 188 | std::move(code), stage_offset); | ||
| 189 | result = shader.get(); | ||
| 190 | |||
| 191 | if (cpu_addr) { | ||
| 192 | Register(std::move(shader), *cpu_addr, size_in_bytes); | ||
| 193 | } else { | ||
| 194 | null_shader = std::move(shader); | ||
| 195 | } | ||
| 196 | } | ||
| 197 | shaders[index] = result; | ||
| 198 | } | ||
| 199 | return last_shaders = shaders; | ||
| 200 | } | ||
| 201 | |||
| 202 | VKGraphicsPipeline* VKPipelineCache::GetGraphicsPipeline( | ||
| 203 | const GraphicsPipelineCacheKey& key, u32 num_color_buffers, | ||
| 204 | VideoCommon::Shader::AsyncShaders& async_shaders) { | ||
| 205 | MICROPROFILE_SCOPE(Vulkan_PipelineCache); | ||
| 206 | |||
| 207 | if (last_graphics_pipeline && last_graphics_key == key) { | ||
| 208 | return last_graphics_pipeline; | ||
| 209 | } | ||
| 210 | last_graphics_key = key; | ||
| 211 | |||
| 212 | if (device.UseAsynchronousShaders() && async_shaders.IsShaderAsync(gpu)) { | ||
| 213 | std::unique_lock lock{pipeline_cache}; | ||
| 214 | const auto [pair, is_cache_miss] = graphics_cache.try_emplace(key); | ||
| 215 | if (is_cache_miss) { | ||
| 216 | gpu.ShaderNotify().MarkSharderBuilding(); | ||
| 217 | LOG_INFO(Render_Vulkan, "Compile 0x{:016X}", key.Hash()); | ||
| 218 | const auto [program, bindings] = DecompileShaders(key.fixed_state); | ||
| 219 | async_shaders.QueueVulkanShader(this, device, scheduler, descriptor_pool, | ||
| 220 | update_descriptor_queue, bindings, program, key, | ||
| 221 | num_color_buffers); | ||
| 222 | } | ||
| 223 | last_graphics_pipeline = pair->second.get(); | ||
| 224 | return last_graphics_pipeline; | ||
| 225 | } | ||
| 226 | |||
| 227 | const auto [pair, is_cache_miss] = graphics_cache.try_emplace(key); | ||
| 228 | auto& entry = pair->second; | ||
| 229 | if (is_cache_miss) { | ||
| 230 | gpu.ShaderNotify().MarkSharderBuilding(); | ||
| 231 | LOG_INFO(Render_Vulkan, "Compile 0x{:016X}", key.Hash()); | ||
| 232 | const auto [program, bindings] = DecompileShaders(key.fixed_state); | ||
| 233 | entry = std::make_unique<VKGraphicsPipeline>(device, scheduler, descriptor_pool, | ||
| 234 | update_descriptor_queue, key, bindings, | ||
| 235 | program, num_color_buffers); | ||
| 236 | gpu.ShaderNotify().MarkShaderComplete(); | ||
| 237 | } | ||
| 238 | last_graphics_pipeline = entry.get(); | ||
| 239 | return last_graphics_pipeline; | ||
| 240 | } | ||
| 241 | 89 | ||
| 242 | VKComputePipeline& VKPipelineCache::GetComputePipeline(const ComputePipelineCacheKey& key) { | 90 | ComputePipeline& PipelineCache::GetComputePipeline(const ComputePipelineCacheKey& key) { |
| 243 | MICROPROFILE_SCOPE(Vulkan_PipelineCache); | 91 | MICROPROFILE_SCOPE(Vulkan_PipelineCache); |
| 244 | 92 | ||
| 245 | const auto [pair, is_cache_miss] = compute_cache.try_emplace(key); | 93 | const auto [pair, is_cache_miss] = compute_cache.try_emplace(key); |
| @@ -248,200 +96,9 @@ VKComputePipeline& VKPipelineCache::GetComputePipeline(const ComputePipelineCach | |||
| 248 | return *entry; | 96 | return *entry; |
| 249 | } | 97 | } |
| 250 | LOG_INFO(Render_Vulkan, "Compile 0x{:016X}", key.Hash()); | 98 | LOG_INFO(Render_Vulkan, "Compile 0x{:016X}", key.Hash()); |
| 251 | 99 | throw "Bad"; | |
| 252 | const GPUVAddr gpu_addr = key.shader; | ||
| 253 | |||
| 254 | const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); | ||
| 255 | ASSERT(cpu_addr); | ||
| 256 | |||
| 257 | Shader* shader = cpu_addr ? TryGet(*cpu_addr) : null_kernel.get(); | ||
| 258 | if (!shader) { | ||
| 259 | // No shader found - create a new one | ||
| 260 | const auto host_ptr = gpu_memory.GetPointer(gpu_addr); | ||
| 261 | |||
| 262 | ProgramCode code = GetShaderCode(gpu_memory, gpu_addr, host_ptr, true); | ||
| 263 | const std::size_t size_in_bytes = code.size() * sizeof(u64); | ||
| 264 | |||
| 265 | auto shader_info = std::make_unique<Shader>(kepler_compute, ShaderType::Compute, gpu_addr, | ||
| 266 | *cpu_addr, std::move(code), KERNEL_MAIN_OFFSET); | ||
| 267 | shader = shader_info.get(); | ||
| 268 | |||
| 269 | if (cpu_addr) { | ||
| 270 | Register(std::move(shader_info), *cpu_addr, size_in_bytes); | ||
| 271 | } else { | ||
| 272 | null_kernel = std::move(shader_info); | ||
| 273 | } | ||
| 274 | } | ||
| 275 | |||
| 276 | const Specialization specialization{ | ||
| 277 | .base_binding = 0, | ||
| 278 | .workgroup_size = key.workgroup_size, | ||
| 279 | .shared_memory_size = key.shared_memory_size, | ||
| 280 | .point_size = std::nullopt, | ||
| 281 | .enabled_attributes = {}, | ||
| 282 | .attribute_types = {}, | ||
| 283 | .ndc_minus_one_to_one = false, | ||
| 284 | }; | ||
| 285 | const SPIRVShader spirv_shader{Decompile(device, shader->GetIR(), ShaderType::Compute, | ||
| 286 | shader->GetRegistry(), specialization), | ||
| 287 | shader->GetEntries()}; | ||
| 288 | entry = std::make_unique<VKComputePipeline>(device, scheduler, descriptor_pool, | ||
| 289 | update_descriptor_queue, spirv_shader); | ||
| 290 | return *entry; | ||
| 291 | } | ||
| 292 | |||
| 293 | void VKPipelineCache::EmplacePipeline(std::unique_ptr<VKGraphicsPipeline> pipeline) { | ||
| 294 | gpu.ShaderNotify().MarkShaderComplete(); | ||
| 295 | std::unique_lock lock{pipeline_cache}; | ||
| 296 | graphics_cache.at(pipeline->GetCacheKey()) = std::move(pipeline); | ||
| 297 | } | ||
| 298 | |||
| 299 | void VKPipelineCache::OnShaderRemoval(Shader* shader) { | ||
| 300 | bool finished = false; | ||
| 301 | const auto Finish = [&] { | ||
| 302 | // TODO(Rodrigo): Instead of finishing here, wait for the fences that use this pipeline and | ||
| 303 | // flush. | ||
| 304 | if (finished) { | ||
| 305 | return; | ||
| 306 | } | ||
| 307 | finished = true; | ||
| 308 | scheduler.Finish(); | ||
| 309 | }; | ||
| 310 | |||
| 311 | const GPUVAddr invalidated_addr = shader->GetGpuAddr(); | ||
| 312 | for (auto it = graphics_cache.begin(); it != graphics_cache.end();) { | ||
| 313 | auto& entry = it->first; | ||
| 314 | if (std::find(entry.shaders.begin(), entry.shaders.end(), invalidated_addr) == | ||
| 315 | entry.shaders.end()) { | ||
| 316 | ++it; | ||
| 317 | continue; | ||
| 318 | } | ||
| 319 | Finish(); | ||
| 320 | it = graphics_cache.erase(it); | ||
| 321 | } | ||
| 322 | for (auto it = compute_cache.begin(); it != compute_cache.end();) { | ||
| 323 | auto& entry = it->first; | ||
| 324 | if (entry.shader != invalidated_addr) { | ||
| 325 | ++it; | ||
| 326 | continue; | ||
| 327 | } | ||
| 328 | Finish(); | ||
| 329 | it = compute_cache.erase(it); | ||
| 330 | } | ||
| 331 | } | ||
| 332 | |||
| 333 | std::pair<SPIRVProgram, std::vector<VkDescriptorSetLayoutBinding>> | ||
| 334 | VKPipelineCache::DecompileShaders(const FixedPipelineState& fixed_state) { | ||
| 335 | Specialization specialization; | ||
| 336 | if (fixed_state.topology == Maxwell::PrimitiveTopology::Points) { | ||
| 337 | float point_size; | ||
| 338 | std::memcpy(&point_size, &fixed_state.point_size, sizeof(float)); | ||
| 339 | specialization.point_size = point_size; | ||
| 340 | ASSERT(point_size != 0.0f); | ||
| 341 | } | ||
| 342 | for (std::size_t i = 0; i < Maxwell::NumVertexAttributes; ++i) { | ||
| 343 | const auto& attribute = fixed_state.attributes[i]; | ||
| 344 | specialization.enabled_attributes[i] = attribute.enabled.Value() != 0; | ||
| 345 | specialization.attribute_types[i] = attribute.Type(); | ||
| 346 | } | ||
| 347 | specialization.ndc_minus_one_to_one = fixed_state.ndc_minus_one_to_one; | ||
| 348 | specialization.early_fragment_tests = fixed_state.early_z; | ||
| 349 | |||
| 350 | // Alpha test | ||
| 351 | specialization.alpha_test_func = | ||
| 352 | FixedPipelineState::UnpackComparisonOp(fixed_state.alpha_test_func.Value()); | ||
| 353 | specialization.alpha_test_ref = Common::BitCast<float>(fixed_state.alpha_test_ref); | ||
| 354 | |||
| 355 | SPIRVProgram program; | ||
| 356 | std::vector<VkDescriptorSetLayoutBinding> bindings; | ||
| 357 | |||
| 358 | for (std::size_t index = 1; index < Maxwell::MaxShaderProgram; ++index) { | ||
| 359 | const auto program_enum = static_cast<Maxwell::ShaderProgram>(index); | ||
| 360 | // Skip stages that are not enabled | ||
| 361 | if (!maxwell3d.regs.IsShaderConfigEnabled(index)) { | ||
| 362 | continue; | ||
| 363 | } | ||
| 364 | const GPUVAddr gpu_addr = GetShaderAddress(maxwell3d, program_enum); | ||
| 365 | const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); | ||
| 366 | Shader* const shader = cpu_addr ? TryGet(*cpu_addr) : null_shader.get(); | ||
| 367 | |||
| 368 | const std::size_t stage = index == 0 ? 0 : index - 1; // Stage indices are 0 - 5 | ||
| 369 | const ShaderType program_type = GetShaderType(program_enum); | ||
| 370 | const auto& entries = shader->GetEntries(); | ||
| 371 | program[stage] = { | ||
| 372 | Decompile(device, shader->GetIR(), program_type, shader->GetRegistry(), specialization), | ||
| 373 | entries, | ||
| 374 | }; | ||
| 375 | |||
| 376 | const u32 old_binding = specialization.base_binding; | ||
| 377 | specialization.base_binding = | ||
| 378 | FillDescriptorLayout(entries, bindings, program_enum, specialization.base_binding); | ||
| 379 | ASSERT(old_binding + entries.NumBindings() == specialization.base_binding); | ||
| 380 | } | ||
| 381 | return {std::move(program), std::move(bindings)}; | ||
| 382 | } | 100 | } |
| 383 | 101 | ||
| 384 | template <VkDescriptorType descriptor_type, class Container> | 102 | void PipelineCache::OnShaderRemoval(Shader*) {} |
| 385 | void AddEntry(std::vector<VkDescriptorUpdateTemplateEntry>& template_entries, u32& binding, | ||
| 386 | u32& offset, const Container& container) { | ||
| 387 | static constexpr u32 entry_size = static_cast<u32>(sizeof(DescriptorUpdateEntry)); | ||
| 388 | const u32 count = static_cast<u32>(std::size(container)); | ||
| 389 | |||
| 390 | if constexpr (descriptor_type == COMBINED_IMAGE_SAMPLER) { | ||
| 391 | for (u32 i = 0; i < count; ++i) { | ||
| 392 | const u32 num_samplers = container[i].size; | ||
| 393 | template_entries.push_back({ | ||
| 394 | .dstBinding = binding, | ||
| 395 | .dstArrayElement = 0, | ||
| 396 | .descriptorCount = num_samplers, | ||
| 397 | .descriptorType = descriptor_type, | ||
| 398 | .offset = offset, | ||
| 399 | .stride = entry_size, | ||
| 400 | }); | ||
| 401 | |||
| 402 | ++binding; | ||
| 403 | offset += num_samplers * entry_size; | ||
| 404 | } | ||
| 405 | return; | ||
| 406 | } | ||
| 407 | |||
| 408 | if constexpr (descriptor_type == UNIFORM_TEXEL_BUFFER || | ||
| 409 | descriptor_type == STORAGE_TEXEL_BUFFER) { | ||
| 410 | // Nvidia has a bug where updating multiple texels at once causes the driver to crash. | ||
| 411 | // Note: Fixed in driver Windows 443.24, Linux 440.66.15 | ||
| 412 | for (u32 i = 0; i < count; ++i) { | ||
| 413 | template_entries.push_back({ | ||
| 414 | .dstBinding = binding + i, | ||
| 415 | .dstArrayElement = 0, | ||
| 416 | .descriptorCount = 1, | ||
| 417 | .descriptorType = descriptor_type, | ||
| 418 | .offset = static_cast<std::size_t>(offset + i * entry_size), | ||
| 419 | .stride = entry_size, | ||
| 420 | }); | ||
| 421 | } | ||
| 422 | } else if (count > 0) { | ||
| 423 | template_entries.push_back({ | ||
| 424 | .dstBinding = binding, | ||
| 425 | .dstArrayElement = 0, | ||
| 426 | .descriptorCount = count, | ||
| 427 | .descriptorType = descriptor_type, | ||
| 428 | .offset = offset, | ||
| 429 | .stride = entry_size, | ||
| 430 | }); | ||
| 431 | } | ||
| 432 | offset += count * entry_size; | ||
| 433 | binding += count; | ||
| 434 | } | ||
| 435 | |||
| 436 | void FillDescriptorUpdateTemplateEntries( | ||
| 437 | const ShaderEntries& entries, u32& binding, u32& offset, | ||
| 438 | std::vector<VkDescriptorUpdateTemplateEntryKHR>& template_entries) { | ||
| 439 | AddEntry<UNIFORM_BUFFER>(template_entries, offset, binding, entries.const_buffers); | ||
| 440 | AddEntry<STORAGE_BUFFER>(template_entries, offset, binding, entries.global_buffers); | ||
| 441 | AddEntry<UNIFORM_TEXEL_BUFFER>(template_entries, offset, binding, entries.uniform_texels); | ||
| 442 | AddEntry<COMBINED_IMAGE_SAMPLER>(template_entries, offset, binding, entries.samplers); | ||
| 443 | AddEntry<STORAGE_TEXEL_BUFFER>(template_entries, offset, binding, entries.storage_texels); | ||
| 444 | AddEntry<STORAGE_IMAGE>(template_entries, offset, binding, entries.images); | ||
| 445 | } | ||
| 446 | 103 | ||
| 447 | } // namespace Vulkan | 104 | } // namespace Vulkan |
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h index 89d635a3d..e3e63340d 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h | |||
| @@ -15,15 +15,8 @@ | |||
| 15 | #include <boost/functional/hash.hpp> | 15 | #include <boost/functional/hash.hpp> |
| 16 | 16 | ||
| 17 | #include "common/common_types.h" | 17 | #include "common/common_types.h" |
| 18 | #include "video_core/engines/const_buffer_engine_interface.h" | ||
| 19 | #include "video_core/engines/maxwell_3d.h" | 18 | #include "video_core/engines/maxwell_3d.h" |
| 20 | #include "video_core/renderer_vulkan/fixed_pipeline_state.h" | 19 | #include "video_core/renderer_vulkan/fixed_pipeline_state.h" |
| 21 | #include "video_core/renderer_vulkan/vk_graphics_pipeline.h" | ||
| 22 | #include "video_core/renderer_vulkan/vk_shader_decompiler.h" | ||
| 23 | #include "video_core/shader/async_shaders.h" | ||
| 24 | #include "video_core/shader/memory_util.h" | ||
| 25 | #include "video_core/shader/registry.h" | ||
| 26 | #include "video_core/shader/shader_ir.h" | ||
| 27 | #include "video_core/shader_cache.h" | 20 | #include "video_core/shader_cache.h" |
| 28 | #include "video_core/vulkan_common/vulkan_wrapper.h" | 21 | #include "video_core/vulkan_common/vulkan_wrapper.h" |
| 29 | 22 | ||
| @@ -35,7 +28,7 @@ namespace Vulkan { | |||
| 35 | 28 | ||
| 36 | class Device; | 29 | class Device; |
| 37 | class RasterizerVulkan; | 30 | class RasterizerVulkan; |
| 38 | class VKComputePipeline; | 31 | class ComputePipeline; |
| 39 | class VKDescriptorPool; | 32 | class VKDescriptorPool; |
| 40 | class VKScheduler; | 33 | class VKScheduler; |
| 41 | class VKUpdateDescriptorQueue; | 34 | class VKUpdateDescriptorQueue; |
| @@ -47,7 +40,7 @@ struct ComputePipelineCacheKey { | |||
| 47 | u32 shared_memory_size; | 40 | u32 shared_memory_size; |
| 48 | std::array<u32, 3> workgroup_size; | 41 | std::array<u32, 3> workgroup_size; |
| 49 | 42 | ||
| 50 | std::size_t Hash() const noexcept; | 43 | size_t Hash() const noexcept; |
| 51 | 44 | ||
| 52 | bool operator==(const ComputePipelineCacheKey& rhs) const noexcept; | 45 | bool operator==(const ComputePipelineCacheKey& rhs) const noexcept; |
| 53 | 46 | ||
| @@ -64,15 +57,8 @@ static_assert(std::is_trivially_constructible_v<ComputePipelineCacheKey>); | |||
| 64 | namespace std { | 57 | namespace std { |
| 65 | 58 | ||
| 66 | template <> | 59 | template <> |
| 67 | struct hash<Vulkan::GraphicsPipelineCacheKey> { | ||
| 68 | std::size_t operator()(const Vulkan::GraphicsPipelineCacheKey& k) const noexcept { | ||
| 69 | return k.Hash(); | ||
| 70 | } | ||
| 71 | }; | ||
| 72 | |||
| 73 | template <> | ||
| 74 | struct hash<Vulkan::ComputePipelineCacheKey> { | 60 | struct hash<Vulkan::ComputePipelineCacheKey> { |
| 75 | std::size_t operator()(const Vulkan::ComputePipelineCacheKey& k) const noexcept { | 61 | size_t operator()(const Vulkan::ComputePipelineCacheKey& k) const noexcept { |
| 76 | return k.Hash(); | 62 | return k.Hash(); |
| 77 | } | 63 | } |
| 78 | }; | 64 | }; |
| @@ -83,66 +69,26 @@ namespace Vulkan { | |||
| 83 | 69 | ||
| 84 | class Shader { | 70 | class Shader { |
| 85 | public: | 71 | public: |
| 86 | explicit Shader(Tegra::Engines::ConstBufferEngineInterface& engine_, | 72 | explicit Shader(); |
| 87 | Tegra::Engines::ShaderType stage_, GPUVAddr gpu_addr, VAddr cpu_addr_, | ||
| 88 | VideoCommon::Shader::ProgramCode program_code, u32 main_offset_); | ||
| 89 | ~Shader(); | 73 | ~Shader(); |
| 90 | |||
| 91 | GPUVAddr GetGpuAddr() const { | ||
| 92 | return gpu_addr; | ||
| 93 | } | ||
| 94 | |||
| 95 | VideoCommon::Shader::ShaderIR& GetIR() { | ||
| 96 | return shader_ir; | ||
| 97 | } | ||
| 98 | |||
| 99 | const VideoCommon::Shader::ShaderIR& GetIR() const { | ||
| 100 | return shader_ir; | ||
| 101 | } | ||
| 102 | |||
| 103 | const VideoCommon::Shader::Registry& GetRegistry() const { | ||
| 104 | return registry; | ||
| 105 | } | ||
| 106 | |||
| 107 | const ShaderEntries& GetEntries() const { | ||
| 108 | return entries; | ||
| 109 | } | ||
| 110 | |||
| 111 | private: | ||
| 112 | GPUVAddr gpu_addr{}; | ||
| 113 | VideoCommon::Shader::ProgramCode program_code; | ||
| 114 | VideoCommon::Shader::Registry registry; | ||
| 115 | VideoCommon::Shader::ShaderIR shader_ir; | ||
| 116 | ShaderEntries entries; | ||
| 117 | }; | 74 | }; |
| 118 | 75 | ||
| 119 | class VKPipelineCache final : public VideoCommon::ShaderCache<Shader> { | 76 | class PipelineCache final : public VideoCommon::ShaderCache<Shader> { |
| 120 | public: | 77 | public: |
| 121 | explicit VKPipelineCache(RasterizerVulkan& rasterizer, Tegra::GPU& gpu, | 78 | explicit PipelineCache(RasterizerVulkan& rasterizer, Tegra::GPU& gpu, |
| 122 | Tegra::Engines::Maxwell3D& maxwell3d, | 79 | Tegra::Engines::Maxwell3D& maxwell3d, |
| 123 | Tegra::Engines::KeplerCompute& kepler_compute, | 80 | Tegra::Engines::KeplerCompute& kepler_compute, |
| 124 | Tegra::MemoryManager& gpu_memory, const Device& device, | 81 | Tegra::MemoryManager& gpu_memory, const Device& device, |
| 125 | VKScheduler& scheduler, VKDescriptorPool& descriptor_pool, | 82 | VKScheduler& scheduler, VKDescriptorPool& descriptor_pool, |
| 126 | VKUpdateDescriptorQueue& update_descriptor_queue); | 83 | VKUpdateDescriptorQueue& update_descriptor_queue); |
| 127 | ~VKPipelineCache() override; | 84 | ~PipelineCache() override; |
| 128 | |||
| 129 | std::array<Shader*, Maxwell::MaxShaderProgram> GetShaders(); | ||
| 130 | 85 | ||
| 131 | VKGraphicsPipeline* GetGraphicsPipeline(const GraphicsPipelineCacheKey& key, | 86 | ComputePipeline& GetComputePipeline(const ComputePipelineCacheKey& key); |
| 132 | u32 num_color_buffers, | ||
| 133 | VideoCommon::Shader::AsyncShaders& async_shaders); | ||
| 134 | |||
| 135 | VKComputePipeline& GetComputePipeline(const ComputePipelineCacheKey& key); | ||
| 136 | |||
| 137 | void EmplacePipeline(std::unique_ptr<VKGraphicsPipeline> pipeline); | ||
| 138 | 87 | ||
| 139 | protected: | 88 | protected: |
| 140 | void OnShaderRemoval(Shader* shader) final; | 89 | void OnShaderRemoval(Shader* shader) final; |
| 141 | 90 | ||
| 142 | private: | 91 | private: |
| 143 | std::pair<SPIRVProgram, std::vector<VkDescriptorSetLayoutBinding>> DecompileShaders( | ||
| 144 | const FixedPipelineState& fixed_state); | ||
| 145 | |||
| 146 | Tegra::GPU& gpu; | 92 | Tegra::GPU& gpu; |
| 147 | Tegra::Engines::Maxwell3D& maxwell3d; | 93 | Tegra::Engines::Maxwell3D& maxwell3d; |
| 148 | Tegra::Engines::KeplerCompute& kepler_compute; | 94 | Tegra::Engines::KeplerCompute& kepler_compute; |
| @@ -158,17 +104,8 @@ private: | |||
| 158 | 104 | ||
| 159 | std::array<Shader*, Maxwell::MaxShaderProgram> last_shaders{}; | 105 | std::array<Shader*, Maxwell::MaxShaderProgram> last_shaders{}; |
| 160 | 106 | ||
| 161 | GraphicsPipelineCacheKey last_graphics_key; | ||
| 162 | VKGraphicsPipeline* last_graphics_pipeline = nullptr; | ||
| 163 | |||
| 164 | std::mutex pipeline_cache; | 107 | std::mutex pipeline_cache; |
| 165 | std::unordered_map<GraphicsPipelineCacheKey, std::unique_ptr<VKGraphicsPipeline>> | 108 | std::unordered_map<ComputePipelineCacheKey, std::unique_ptr<ComputePipeline>> compute_cache; |
| 166 | graphics_cache; | ||
| 167 | std::unordered_map<ComputePipelineCacheKey, std::unique_ptr<VKComputePipeline>> compute_cache; | ||
| 168 | }; | 109 | }; |
| 169 | 110 | ||
| 170 | void FillDescriptorUpdateTemplateEntries( | ||
| 171 | const ShaderEntries& entries, u32& binding, u32& offset, | ||
| 172 | std::vector<VkDescriptorUpdateTemplateEntryKHR>& template_entries); | ||
| 173 | |||
| 174 | } // namespace Vulkan | 111 | } // namespace Vulkan |
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index f57c15b37..f152297d9 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp | |||
| @@ -24,7 +24,6 @@ | |||
| 24 | #include "video_core/renderer_vulkan/vk_buffer_cache.h" | 24 | #include "video_core/renderer_vulkan/vk_buffer_cache.h" |
| 25 | #include "video_core/renderer_vulkan/vk_compute_pipeline.h" | 25 | #include "video_core/renderer_vulkan/vk_compute_pipeline.h" |
| 26 | #include "video_core/renderer_vulkan/vk_descriptor_pool.h" | 26 | #include "video_core/renderer_vulkan/vk_descriptor_pool.h" |
| 27 | #include "video_core/renderer_vulkan/vk_graphics_pipeline.h" | ||
| 28 | #include "video_core/renderer_vulkan/vk_pipeline_cache.h" | 27 | #include "video_core/renderer_vulkan/vk_pipeline_cache.h" |
| 29 | #include "video_core/renderer_vulkan/vk_rasterizer.h" | 28 | #include "video_core/renderer_vulkan/vk_rasterizer.h" |
| 30 | #include "video_core/renderer_vulkan/vk_scheduler.h" | 29 | #include "video_core/renderer_vulkan/vk_scheduler.h" |
| @@ -97,15 +96,6 @@ VkRect2D GetScissorState(const Maxwell& regs, size_t index) { | |||
| 97 | return scissor; | 96 | return scissor; |
| 98 | } | 97 | } |
| 99 | 98 | ||
| 100 | std::array<GPUVAddr, Maxwell::MaxShaderProgram> GetShaderAddresses( | ||
| 101 | const std::array<Shader*, Maxwell::MaxShaderProgram>& shaders) { | ||
| 102 | std::array<GPUVAddr, Maxwell::MaxShaderProgram> addresses; | ||
| 103 | for (size_t i = 0; i < std::size(addresses); ++i) { | ||
| 104 | addresses[i] = shaders[i] ? shaders[i]->GetGpuAddr() : 0; | ||
| 105 | } | ||
| 106 | return addresses; | ||
| 107 | } | ||
| 108 | |||
| 109 | struct TextureHandle { | 99 | struct TextureHandle { |
| 110 | constexpr TextureHandle(u32 data, bool via_header_index) { | 100 | constexpr TextureHandle(u32 data, bool via_header_index) { |
| 111 | const Tegra::Texture::TextureHandle handle{data}; | 101 | const Tegra::Texture::TextureHandle handle{data}; |
| @@ -117,98 +107,6 @@ struct TextureHandle { | |||
| 117 | u32 sampler; | 107 | u32 sampler; |
| 118 | }; | 108 | }; |
| 119 | 109 | ||
| 120 | template <typename Engine, typename Entry> | ||
| 121 | TextureHandle GetTextureInfo(const Engine& engine, bool via_header_index, const Entry& entry, | ||
| 122 | size_t stage, size_t index = 0) { | ||
| 123 | const auto shader_type = static_cast<Tegra::Engines::ShaderType>(stage); | ||
| 124 | if constexpr (std::is_same_v<Entry, SamplerEntry>) { | ||
| 125 | if (entry.is_separated) { | ||
| 126 | const u32 buffer_1 = entry.buffer; | ||
| 127 | const u32 buffer_2 = entry.secondary_buffer; | ||
| 128 | const u32 offset_1 = entry.offset; | ||
| 129 | const u32 offset_2 = entry.secondary_offset; | ||
| 130 | const u32 handle_1 = engine.AccessConstBuffer32(shader_type, buffer_1, offset_1); | ||
| 131 | const u32 handle_2 = engine.AccessConstBuffer32(shader_type, buffer_2, offset_2); | ||
| 132 | return TextureHandle(handle_1 | handle_2, via_header_index); | ||
| 133 | } | ||
| 134 | } | ||
| 135 | if (entry.is_bindless) { | ||
| 136 | const u32 raw = engine.AccessConstBuffer32(shader_type, entry.buffer, entry.offset); | ||
| 137 | return TextureHandle(raw, via_header_index); | ||
| 138 | } | ||
| 139 | const u32 buffer = engine.GetBoundBuffer(); | ||
| 140 | const u64 offset = (entry.offset + index) * sizeof(u32); | ||
| 141 | return TextureHandle(engine.AccessConstBuffer32(shader_type, buffer, offset), via_header_index); | ||
| 142 | } | ||
| 143 | |||
| 144 | ImageViewType ImageViewTypeFromEntry(const SamplerEntry& entry) { | ||
| 145 | if (entry.is_buffer) { | ||
| 146 | return ImageViewType::e2D; | ||
| 147 | } | ||
| 148 | switch (entry.type) { | ||
| 149 | case Tegra::Shader::TextureType::Texture1D: | ||
| 150 | return entry.is_array ? ImageViewType::e1DArray : ImageViewType::e1D; | ||
| 151 | case Tegra::Shader::TextureType::Texture2D: | ||
| 152 | return entry.is_array ? ImageViewType::e2DArray : ImageViewType::e2D; | ||
| 153 | case Tegra::Shader::TextureType::Texture3D: | ||
| 154 | return ImageViewType::e3D; | ||
| 155 | case Tegra::Shader::TextureType::TextureCube: | ||
| 156 | return entry.is_array ? ImageViewType::CubeArray : ImageViewType::Cube; | ||
| 157 | } | ||
| 158 | UNREACHABLE(); | ||
| 159 | return ImageViewType::e2D; | ||
| 160 | } | ||
| 161 | |||
| 162 | ImageViewType ImageViewTypeFromEntry(const ImageEntry& entry) { | ||
| 163 | switch (entry.type) { | ||
| 164 | case Tegra::Shader::ImageType::Texture1D: | ||
| 165 | return ImageViewType::e1D; | ||
| 166 | case Tegra::Shader::ImageType::Texture1DArray: | ||
| 167 | return ImageViewType::e1DArray; | ||
| 168 | case Tegra::Shader::ImageType::Texture2D: | ||
| 169 | return ImageViewType::e2D; | ||
| 170 | case Tegra::Shader::ImageType::Texture2DArray: | ||
| 171 | return ImageViewType::e2DArray; | ||
| 172 | case Tegra::Shader::ImageType::Texture3D: | ||
| 173 | return ImageViewType::e3D; | ||
| 174 | case Tegra::Shader::ImageType::TextureBuffer: | ||
| 175 | return ImageViewType::Buffer; | ||
| 176 | } | ||
| 177 | UNREACHABLE(); | ||
| 178 | return ImageViewType::e2D; | ||
| 179 | } | ||
| 180 | |||
| 181 | void PushImageDescriptors(const ShaderEntries& entries, TextureCache& texture_cache, | ||
| 182 | VKUpdateDescriptorQueue& update_descriptor_queue, | ||
| 183 | ImageViewId*& image_view_id_ptr, VkSampler*& sampler_ptr) { | ||
| 184 | for ([[maybe_unused]] const auto& entry : entries.uniform_texels) { | ||
| 185 | const ImageViewId image_view_id = *image_view_id_ptr++; | ||
| 186 | const ImageView& image_view = texture_cache.GetImageView(image_view_id); | ||
| 187 | update_descriptor_queue.AddTexelBuffer(image_view.BufferView()); | ||
| 188 | } | ||
| 189 | for (const auto& entry : entries.samplers) { | ||
| 190 | for (size_t i = 0; i < entry.size; ++i) { | ||
| 191 | const VkSampler sampler = *sampler_ptr++; | ||
| 192 | const ImageViewId image_view_id = *image_view_id_ptr++; | ||
| 193 | const ImageView& image_view = texture_cache.GetImageView(image_view_id); | ||
| 194 | const VkImageView handle = image_view.Handle(ImageViewTypeFromEntry(entry)); | ||
| 195 | update_descriptor_queue.AddSampledImage(handle, sampler); | ||
| 196 | } | ||
| 197 | } | ||
| 198 | for ([[maybe_unused]] const auto& entry : entries.storage_texels) { | ||
| 199 | const ImageViewId image_view_id = *image_view_id_ptr++; | ||
| 200 | const ImageView& image_view = texture_cache.GetImageView(image_view_id); | ||
| 201 | update_descriptor_queue.AddTexelBuffer(image_view.BufferView()); | ||
| 202 | } | ||
| 203 | for (const auto& entry : entries.images) { | ||
| 204 | // TODO: Mark as modified | ||
| 205 | const ImageViewId image_view_id = *image_view_id_ptr++; | ||
| 206 | const ImageView& image_view = texture_cache.GetImageView(image_view_id); | ||
| 207 | const VkImageView handle = image_view.Handle(ImageViewTypeFromEntry(entry)); | ||
| 208 | update_descriptor_queue.AddImage(handle); | ||
| 209 | } | ||
| 210 | } | ||
| 211 | |||
| 212 | DrawParams MakeDrawParams(const Maxwell& regs, u32 num_instances, bool is_instanced, | 110 | DrawParams MakeDrawParams(const Maxwell& regs, u32 num_instances, bool is_instanced, |
| 213 | bool is_indexed) { | 111 | bool is_indexed) { |
| 214 | DrawParams params{ | 112 | DrawParams params{ |
| @@ -253,71 +151,14 @@ RasterizerVulkan::RasterizerVulkan(Core::Frontend::EmuWindow& emu_window_, Tegra | |||
| 253 | descriptor_pool, update_descriptor_queue), | 151 | descriptor_pool, update_descriptor_queue), |
| 254 | query_cache{*this, maxwell3d, gpu_memory, device, scheduler}, accelerate_dma{buffer_cache}, | 152 | query_cache{*this, maxwell3d, gpu_memory, device, scheduler}, accelerate_dma{buffer_cache}, |
| 255 | fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache, device, scheduler), | 153 | fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache, device, scheduler), |
| 256 | wfi_event(device.GetLogical().CreateEvent()), async_shaders(emu_window_) { | 154 | wfi_event(device.GetLogical().CreateEvent()) { |
| 257 | scheduler.SetQueryCache(query_cache); | 155 | scheduler.SetQueryCache(query_cache); |
| 258 | if (device.UseAsynchronousShaders()) { | ||
| 259 | async_shaders.AllocateWorkers(); | ||
| 260 | } | ||
| 261 | } | 156 | } |
| 262 | 157 | ||
| 263 | RasterizerVulkan::~RasterizerVulkan() = default; | 158 | RasterizerVulkan::~RasterizerVulkan() = default; |
| 264 | 159 | ||
| 265 | void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) { | 160 | void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) { |
| 266 | MICROPROFILE_SCOPE(Vulkan_Drawing); | 161 | UNREACHABLE_MSG("Rendering not implemented {} {}", is_indexed, is_instanced); |
| 267 | |||
| 268 | SCOPE_EXIT({ gpu.TickWork(); }); | ||
| 269 | FlushWork(); | ||
| 270 | |||
| 271 | query_cache.UpdateCounters(); | ||
| 272 | |||
| 273 | graphics_key.fixed_state.Refresh(maxwell3d, device.IsExtExtendedDynamicStateSupported()); | ||
| 274 | |||
| 275 | std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex}; | ||
| 276 | |||
| 277 | texture_cache.SynchronizeGraphicsDescriptors(); | ||
| 278 | texture_cache.UpdateRenderTargets(false); | ||
| 279 | |||
| 280 | const auto shaders = pipeline_cache.GetShaders(); | ||
| 281 | graphics_key.shaders = GetShaderAddresses(shaders); | ||
| 282 | |||
| 283 | SetupShaderDescriptors(shaders, is_indexed); | ||
| 284 | |||
| 285 | const Framebuffer* const framebuffer = texture_cache.GetFramebuffer(); | ||
| 286 | graphics_key.renderpass = framebuffer->RenderPass(); | ||
| 287 | |||
| 288 | VKGraphicsPipeline* const pipeline = pipeline_cache.GetGraphicsPipeline( | ||
| 289 | graphics_key, framebuffer->NumColorBuffers(), async_shaders); | ||
| 290 | if (pipeline == nullptr || pipeline->GetHandle() == VK_NULL_HANDLE) { | ||
| 291 | // Async graphics pipeline was not ready. | ||
| 292 | return; | ||
| 293 | } | ||
| 294 | |||
| 295 | BeginTransformFeedback(); | ||
| 296 | |||
| 297 | scheduler.RequestRenderpass(framebuffer); | ||
| 298 | scheduler.BindGraphicsPipeline(pipeline->GetHandle()); | ||
| 299 | UpdateDynamicStates(); | ||
| 300 | |||
| 301 | const auto& regs = maxwell3d.regs; | ||
| 302 | const u32 num_instances = maxwell3d.mme_draw.instance_count; | ||
| 303 | const DrawParams draw_params = MakeDrawParams(regs, num_instances, is_instanced, is_indexed); | ||
| 304 | const VkPipelineLayout pipeline_layout = pipeline->GetLayout(); | ||
| 305 | const VkDescriptorSet descriptor_set = pipeline->CommitDescriptorSet(); | ||
| 306 | scheduler.Record([pipeline_layout, descriptor_set, draw_params](vk::CommandBuffer cmdbuf) { | ||
| 307 | if (descriptor_set) { | ||
| 308 | cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline_layout, | ||
| 309 | DESCRIPTOR_SET, descriptor_set, nullptr); | ||
| 310 | } | ||
| 311 | if (draw_params.is_indexed) { | ||
| 312 | cmdbuf.DrawIndexed(draw_params.num_vertices, draw_params.num_instances, 0, | ||
| 313 | draw_params.base_vertex, draw_params.base_instance); | ||
| 314 | } else { | ||
| 315 | cmdbuf.Draw(draw_params.num_vertices, draw_params.num_instances, | ||
| 316 | draw_params.base_vertex, draw_params.base_instance); | ||
| 317 | } | ||
| 318 | }); | ||
| 319 | |||
| 320 | EndTransformFeedback(); | ||
| 321 | } | 162 | } |
| 322 | 163 | ||
| 323 | void RasterizerVulkan::Clear() { | 164 | void RasterizerVulkan::Clear() { |
| @@ -395,73 +236,8 @@ void RasterizerVulkan::Clear() { | |||
| 395 | }); | 236 | }); |
| 396 | } | 237 | } |
| 397 | 238 | ||
| 398 | void RasterizerVulkan::DispatchCompute(GPUVAddr code_addr) { | 239 | void RasterizerVulkan::DispatchCompute() { |
| 399 | MICROPROFILE_SCOPE(Vulkan_Compute); | 240 | UNREACHABLE_MSG("Not implemented"); |
| 400 | |||
| 401 | query_cache.UpdateCounters(); | ||
| 402 | |||
| 403 | const auto& launch_desc = kepler_compute.launch_description; | ||
| 404 | auto& pipeline = pipeline_cache.GetComputePipeline({ | ||
| 405 | .shader = code_addr, | ||
| 406 | .shared_memory_size = launch_desc.shared_alloc, | ||
| 407 | .workgroup_size{ | ||
| 408 | launch_desc.block_dim_x, | ||
| 409 | launch_desc.block_dim_y, | ||
| 410 | launch_desc.block_dim_z, | ||
| 411 | }, | ||
| 412 | }); | ||
| 413 | |||
| 414 | // Compute dispatches can't be executed inside a renderpass | ||
| 415 | scheduler.RequestOutsideRenderPassOperationContext(); | ||
| 416 | |||
| 417 | image_view_indices.clear(); | ||
| 418 | sampler_handles.clear(); | ||
| 419 | |||
| 420 | std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex}; | ||
| 421 | |||
| 422 | const auto& entries = pipeline.GetEntries(); | ||
| 423 | buffer_cache.SetEnabledComputeUniformBuffers(entries.enabled_uniform_buffers); | ||
| 424 | buffer_cache.UnbindComputeStorageBuffers(); | ||
| 425 | u32 ssbo_index = 0; | ||
| 426 | for (const auto& buffer : entries.global_buffers) { | ||
| 427 | buffer_cache.BindComputeStorageBuffer(ssbo_index, buffer.cbuf_index, buffer.cbuf_offset, | ||
| 428 | buffer.is_written); | ||
| 429 | ++ssbo_index; | ||
| 430 | } | ||
| 431 | buffer_cache.UpdateComputeBuffers(); | ||
| 432 | |||
| 433 | texture_cache.SynchronizeComputeDescriptors(); | ||
| 434 | |||
| 435 | SetupComputeUniformTexels(entries); | ||
| 436 | SetupComputeTextures(entries); | ||
| 437 | SetupComputeStorageTexels(entries); | ||
| 438 | SetupComputeImages(entries); | ||
| 439 | |||
| 440 | const std::span indices_span(image_view_indices.data(), image_view_indices.size()); | ||
| 441 | texture_cache.FillComputeImageViews(indices_span, image_view_ids); | ||
| 442 | |||
| 443 | update_descriptor_queue.Acquire(); | ||
| 444 | |||
| 445 | buffer_cache.BindHostComputeBuffers(); | ||
| 446 | |||
| 447 | ImageViewId* image_view_id_ptr = image_view_ids.data(); | ||
| 448 | VkSampler* sampler_ptr = sampler_handles.data(); | ||
| 449 | PushImageDescriptors(entries, texture_cache, update_descriptor_queue, image_view_id_ptr, | ||
| 450 | sampler_ptr); | ||
| 451 | |||
| 452 | const VkPipeline pipeline_handle = pipeline.GetHandle(); | ||
| 453 | const VkPipelineLayout pipeline_layout = pipeline.GetLayout(); | ||
| 454 | const VkDescriptorSet descriptor_set = pipeline.CommitDescriptorSet(); | ||
| 455 | scheduler.Record([grid_x = launch_desc.grid_dim_x, grid_y = launch_desc.grid_dim_y, | ||
| 456 | grid_z = launch_desc.grid_dim_z, pipeline_handle, pipeline_layout, | ||
| 457 | descriptor_set](vk::CommandBuffer cmdbuf) { | ||
| 458 | cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, pipeline_handle); | ||
| 459 | if (descriptor_set) { | ||
| 460 | cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, pipeline_layout, | ||
| 461 | DESCRIPTOR_SET, descriptor_set, nullptr); | ||
| 462 | } | ||
| 463 | cmdbuf.Dispatch(grid_x, grid_y, grid_z); | ||
| 464 | }); | ||
| 465 | } | 241 | } |
| 466 | 242 | ||
| 467 | void RasterizerVulkan::ResetCounter(VideoCore::QueryType type) { | 243 | void RasterizerVulkan::ResetCounter(VideoCore::QueryType type) { |
| @@ -716,52 +492,6 @@ bool AccelerateDMA::BufferCopy(GPUVAddr src_address, GPUVAddr dest_address, u64 | |||
| 716 | return buffer_cache.DMACopy(src_address, dest_address, amount); | 492 | return buffer_cache.DMACopy(src_address, dest_address, amount); |
| 717 | } | 493 | } |
| 718 | 494 | ||
| 719 | void RasterizerVulkan::SetupShaderDescriptors( | ||
| 720 | const std::array<Shader*, Maxwell::MaxShaderProgram>& shaders, bool is_indexed) { | ||
| 721 | image_view_indices.clear(); | ||
| 722 | sampler_handles.clear(); | ||
| 723 | for (size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) { | ||
| 724 | Shader* const shader = shaders[stage + 1]; | ||
| 725 | if (!shader) { | ||
| 726 | continue; | ||
| 727 | } | ||
| 728 | const ShaderEntries& entries = shader->GetEntries(); | ||
| 729 | SetupGraphicsUniformTexels(entries, stage); | ||
| 730 | SetupGraphicsTextures(entries, stage); | ||
| 731 | SetupGraphicsStorageTexels(entries, stage); | ||
| 732 | SetupGraphicsImages(entries, stage); | ||
| 733 | |||
| 734 | buffer_cache.SetEnabledUniformBuffers(stage, entries.enabled_uniform_buffers); | ||
| 735 | buffer_cache.UnbindGraphicsStorageBuffers(stage); | ||
| 736 | u32 ssbo_index = 0; | ||
| 737 | for (const auto& buffer : entries.global_buffers) { | ||
| 738 | buffer_cache.BindGraphicsStorageBuffer(stage, ssbo_index, buffer.cbuf_index, | ||
| 739 | buffer.cbuf_offset, buffer.is_written); | ||
| 740 | ++ssbo_index; | ||
| 741 | } | ||
| 742 | } | ||
| 743 | const std::span indices_span(image_view_indices.data(), image_view_indices.size()); | ||
| 744 | buffer_cache.UpdateGraphicsBuffers(is_indexed); | ||
| 745 | texture_cache.FillGraphicsImageViews(indices_span, image_view_ids); | ||
| 746 | |||
| 747 | buffer_cache.BindHostGeometryBuffers(is_indexed); | ||
| 748 | |||
| 749 | update_descriptor_queue.Acquire(); | ||
| 750 | |||
| 751 | ImageViewId* image_view_id_ptr = image_view_ids.data(); | ||
| 752 | VkSampler* sampler_ptr = sampler_handles.data(); | ||
| 753 | for (size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) { | ||
| 754 | // Skip VertexA stage | ||
| 755 | Shader* const shader = shaders[stage + 1]; | ||
| 756 | if (!shader) { | ||
| 757 | continue; | ||
| 758 | } | ||
| 759 | buffer_cache.BindHostStageBuffers(stage); | ||
| 760 | PushImageDescriptors(shader->GetEntries(), texture_cache, update_descriptor_queue, | ||
| 761 | image_view_id_ptr, sampler_ptr); | ||
| 762 | } | ||
| 763 | } | ||
| 764 | |||
| 765 | void RasterizerVulkan::UpdateDynamicStates() { | 495 | void RasterizerVulkan::UpdateDynamicStates() { |
| 766 | auto& regs = maxwell3d.regs; | 496 | auto& regs = maxwell3d.regs; |
| 767 | UpdateViewportsState(regs); | 497 | UpdateViewportsState(regs); |
| @@ -810,89 +540,6 @@ void RasterizerVulkan::EndTransformFeedback() { | |||
| 810 | [](vk::CommandBuffer cmdbuf) { cmdbuf.EndTransformFeedbackEXT(0, 0, nullptr, nullptr); }); | 540 | [](vk::CommandBuffer cmdbuf) { cmdbuf.EndTransformFeedbackEXT(0, 0, nullptr, nullptr); }); |
| 811 | } | 541 | } |
| 812 | 542 | ||
| 813 | void RasterizerVulkan::SetupGraphicsUniformTexels(const ShaderEntries& entries, size_t stage) { | ||
| 814 | const auto& regs = maxwell3d.regs; | ||
| 815 | const bool via_header_index = regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex; | ||
| 816 | for (const auto& entry : entries.uniform_texels) { | ||
| 817 | const TextureHandle handle = GetTextureInfo(maxwell3d, via_header_index, entry, stage); | ||
| 818 | image_view_indices.push_back(handle.image); | ||
| 819 | } | ||
| 820 | } | ||
| 821 | |||
| 822 | void RasterizerVulkan::SetupGraphicsTextures(const ShaderEntries& entries, size_t stage) { | ||
| 823 | const auto& regs = maxwell3d.regs; | ||
| 824 | const bool via_header_index = regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex; | ||
| 825 | for (const auto& entry : entries.samplers) { | ||
| 826 | for (size_t index = 0; index < entry.size; ++index) { | ||
| 827 | const TextureHandle handle = | ||
| 828 | GetTextureInfo(maxwell3d, via_header_index, entry, stage, index); | ||
| 829 | image_view_indices.push_back(handle.image); | ||
| 830 | |||
| 831 | Sampler* const sampler = texture_cache.GetGraphicsSampler(handle.sampler); | ||
| 832 | sampler_handles.push_back(sampler->Handle()); | ||
| 833 | } | ||
| 834 | } | ||
| 835 | } | ||
| 836 | |||
| 837 | void RasterizerVulkan::SetupGraphicsStorageTexels(const ShaderEntries& entries, size_t stage) { | ||
| 838 | const auto& regs = maxwell3d.regs; | ||
| 839 | const bool via_header_index = regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex; | ||
| 840 | for (const auto& entry : entries.storage_texels) { | ||
| 841 | const TextureHandle handle = GetTextureInfo(maxwell3d, via_header_index, entry, stage); | ||
| 842 | image_view_indices.push_back(handle.image); | ||
| 843 | } | ||
| 844 | } | ||
| 845 | |||
| 846 | void RasterizerVulkan::SetupGraphicsImages(const ShaderEntries& entries, size_t stage) { | ||
| 847 | const auto& regs = maxwell3d.regs; | ||
| 848 | const bool via_header_index = regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex; | ||
| 849 | for (const auto& entry : entries.images) { | ||
| 850 | const TextureHandle handle = GetTextureInfo(maxwell3d, via_header_index, entry, stage); | ||
| 851 | image_view_indices.push_back(handle.image); | ||
| 852 | } | ||
| 853 | } | ||
| 854 | |||
| 855 | void RasterizerVulkan::SetupComputeUniformTexels(const ShaderEntries& entries) { | ||
| 856 | const bool via_header_index = kepler_compute.launch_description.linked_tsc; | ||
| 857 | for (const auto& entry : entries.uniform_texels) { | ||
| 858 | const TextureHandle handle = | ||
| 859 | GetTextureInfo(kepler_compute, via_header_index, entry, COMPUTE_SHADER_INDEX); | ||
| 860 | image_view_indices.push_back(handle.image); | ||
| 861 | } | ||
| 862 | } | ||
| 863 | |||
| 864 | void RasterizerVulkan::SetupComputeTextures(const ShaderEntries& entries) { | ||
| 865 | const bool via_header_index = kepler_compute.launch_description.linked_tsc; | ||
| 866 | for (const auto& entry : entries.samplers) { | ||
| 867 | for (size_t index = 0; index < entry.size; ++index) { | ||
| 868 | const TextureHandle handle = GetTextureInfo(kepler_compute, via_header_index, entry, | ||
| 869 | COMPUTE_SHADER_INDEX, index); | ||
| 870 | image_view_indices.push_back(handle.image); | ||
| 871 | |||
| 872 | Sampler* const sampler = texture_cache.GetComputeSampler(handle.sampler); | ||
| 873 | sampler_handles.push_back(sampler->Handle()); | ||
| 874 | } | ||
| 875 | } | ||
| 876 | } | ||
| 877 | |||
| 878 | void RasterizerVulkan::SetupComputeStorageTexels(const ShaderEntries& entries) { | ||
| 879 | const bool via_header_index = kepler_compute.launch_description.linked_tsc; | ||
| 880 | for (const auto& entry : entries.storage_texels) { | ||
| 881 | const TextureHandle handle = | ||
| 882 | GetTextureInfo(kepler_compute, via_header_index, entry, COMPUTE_SHADER_INDEX); | ||
| 883 | image_view_indices.push_back(handle.image); | ||
| 884 | } | ||
| 885 | } | ||
| 886 | |||
| 887 | void RasterizerVulkan::SetupComputeImages(const ShaderEntries& entries) { | ||
| 888 | const bool via_header_index = kepler_compute.launch_description.linked_tsc; | ||
| 889 | for (const auto& entry : entries.images) { | ||
| 890 | const TextureHandle handle = | ||
| 891 | GetTextureInfo(kepler_compute, via_header_index, entry, COMPUTE_SHADER_INDEX); | ||
| 892 | image_view_indices.push_back(handle.image); | ||
| 893 | } | ||
| 894 | } | ||
| 895 | |||
| 896 | void RasterizerVulkan::UpdateViewportsState(Tegra::Engines::Maxwell3D::Regs& regs) { | 543 | void RasterizerVulkan::UpdateViewportsState(Tegra::Engines::Maxwell3D::Regs& regs) { |
| 897 | if (!state_tracker.TouchViewports()) { | 544 | if (!state_tracker.TouchViewports()) { |
| 898 | return; | 545 | return; |
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h index 2065209be..31017dc2b 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.h +++ b/src/video_core/renderer_vulkan/vk_rasterizer.h | |||
| @@ -28,7 +28,6 @@ | |||
| 28 | #include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" | 28 | #include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" |
| 29 | #include "video_core/renderer_vulkan/vk_texture_cache.h" | 29 | #include "video_core/renderer_vulkan/vk_texture_cache.h" |
| 30 | #include "video_core/renderer_vulkan/vk_update_descriptor.h" | 30 | #include "video_core/renderer_vulkan/vk_update_descriptor.h" |
| 31 | #include "video_core/shader/async_shaders.h" | ||
| 32 | #include "video_core/vulkan_common/vulkan_memory_allocator.h" | 31 | #include "video_core/vulkan_common/vulkan_memory_allocator.h" |
| 33 | #include "video_core/vulkan_common/vulkan_wrapper.h" | 32 | #include "video_core/vulkan_common/vulkan_wrapper.h" |
| 34 | 33 | ||
| @@ -73,7 +72,7 @@ public: | |||
| 73 | 72 | ||
| 74 | void Draw(bool is_indexed, bool is_instanced) override; | 73 | void Draw(bool is_indexed, bool is_instanced) override; |
| 75 | void Clear() override; | 74 | void Clear() override; |
| 76 | void DispatchCompute(GPUVAddr code_addr) override; | 75 | void DispatchCompute() override; |
| 77 | void ResetCounter(VideoCore::QueryType type) override; | 76 | void ResetCounter(VideoCore::QueryType type) override; |
| 78 | void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) override; | 77 | void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) override; |
| 79 | void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, u32 size) override; | 78 | void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, u32 size) override; |
| @@ -103,19 +102,6 @@ public: | |||
| 103 | bool AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr, | 102 | bool AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr, |
| 104 | u32 pixel_stride) override; | 103 | u32 pixel_stride) override; |
| 105 | 104 | ||
| 106 | VideoCommon::Shader::AsyncShaders& GetAsyncShaders() { | ||
| 107 | return async_shaders; | ||
| 108 | } | ||
| 109 | |||
| 110 | const VideoCommon::Shader::AsyncShaders& GetAsyncShaders() const { | ||
| 111 | return async_shaders; | ||
| 112 | } | ||
| 113 | |||
| 114 | /// Maximum supported size that a constbuffer can have in bytes. | ||
| 115 | static constexpr size_t MaxConstbufferSize = 0x10000; | ||
| 116 | static_assert(MaxConstbufferSize % (4 * sizeof(float)) == 0, | ||
| 117 | "The maximum size of a constbuffer must be a multiple of the size of GLvec4"); | ||
| 118 | |||
| 119 | private: | 105 | private: |
| 120 | static constexpr size_t MAX_TEXTURES = 192; | 106 | static constexpr size_t MAX_TEXTURES = 192; |
| 121 | static constexpr size_t MAX_IMAGES = 48; | 107 | static constexpr size_t MAX_IMAGES = 48; |
| @@ -125,40 +111,12 @@ private: | |||
| 125 | 111 | ||
| 126 | void FlushWork(); | 112 | void FlushWork(); |
| 127 | 113 | ||
| 128 | /// Setup descriptors in the graphics pipeline. | ||
| 129 | void SetupShaderDescriptors(const std::array<Shader*, Maxwell::MaxShaderProgram>& shaders, | ||
| 130 | bool is_indexed); | ||
| 131 | |||
| 132 | void UpdateDynamicStates(); | 114 | void UpdateDynamicStates(); |
| 133 | 115 | ||
| 134 | void BeginTransformFeedback(); | 116 | void BeginTransformFeedback(); |
| 135 | 117 | ||
| 136 | void EndTransformFeedback(); | 118 | void EndTransformFeedback(); |
| 137 | 119 | ||
| 138 | /// Setup uniform texels in the graphics pipeline. | ||
| 139 | void SetupGraphicsUniformTexels(const ShaderEntries& entries, std::size_t stage); | ||
| 140 | |||
| 141 | /// Setup textures in the graphics pipeline. | ||
| 142 | void SetupGraphicsTextures(const ShaderEntries& entries, std::size_t stage); | ||
| 143 | |||
| 144 | /// Setup storage texels in the graphics pipeline. | ||
| 145 | void SetupGraphicsStorageTexels(const ShaderEntries& entries, std::size_t stage); | ||
| 146 | |||
| 147 | /// Setup images in the graphics pipeline. | ||
| 148 | void SetupGraphicsImages(const ShaderEntries& entries, std::size_t stage); | ||
| 149 | |||
| 150 | /// Setup texel buffers in the compute pipeline. | ||
| 151 | void SetupComputeUniformTexels(const ShaderEntries& entries); | ||
| 152 | |||
| 153 | /// Setup textures in the compute pipeline. | ||
| 154 | void SetupComputeTextures(const ShaderEntries& entries); | ||
| 155 | |||
| 156 | /// Setup storage texels in the compute pipeline. | ||
| 157 | void SetupComputeStorageTexels(const ShaderEntries& entries); | ||
| 158 | |||
| 159 | /// Setup images in the compute pipeline. | ||
| 160 | void SetupComputeImages(const ShaderEntries& entries); | ||
| 161 | |||
| 162 | void UpdateViewportsState(Tegra::Engines::Maxwell3D::Regs& regs); | 120 | void UpdateViewportsState(Tegra::Engines::Maxwell3D::Regs& regs); |
| 163 | void UpdateScissorsState(Tegra::Engines::Maxwell3D::Regs& regs); | 121 | void UpdateScissorsState(Tegra::Engines::Maxwell3D::Regs& regs); |
| 164 | void UpdateDepthBias(Tegra::Engines::Maxwell3D::Regs& regs); | 122 | void UpdateDepthBias(Tegra::Engines::Maxwell3D::Regs& regs); |
| @@ -198,13 +156,12 @@ private: | |||
| 198 | TextureCache texture_cache; | 156 | TextureCache texture_cache; |
| 199 | BufferCacheRuntime buffer_cache_runtime; | 157 | BufferCacheRuntime buffer_cache_runtime; |
| 200 | BufferCache buffer_cache; | 158 | BufferCache buffer_cache; |
| 201 | VKPipelineCache pipeline_cache; | 159 | PipelineCache pipeline_cache; |
| 202 | VKQueryCache query_cache; | 160 | VKQueryCache query_cache; |
| 203 | AccelerateDMA accelerate_dma; | 161 | AccelerateDMA accelerate_dma; |
| 204 | VKFenceManager fence_manager; | 162 | VKFenceManager fence_manager; |
| 205 | 163 | ||
| 206 | vk::Event wfi_event; | 164 | vk::Event wfi_event; |
| 207 | VideoCommon::Shader::AsyncShaders async_shaders; | ||
| 208 | 165 | ||
| 209 | boost::container::static_vector<u32, MAX_IMAGE_VIEWS> image_view_indices; | 166 | boost::container::static_vector<u32, MAX_IMAGE_VIEWS> image_view_indices; |
| 210 | std::array<VideoCommon::ImageViewId, MAX_IMAGE_VIEWS> image_view_ids; | 167 | std::array<VideoCommon::ImageViewId, MAX_IMAGE_VIEWS> image_view_ids; |
diff --git a/src/video_core/shader/ast.cpp b/src/video_core/shader/ast.cpp deleted file mode 100644 index db11144c7..000000000 --- a/src/video_core/shader/ast.cpp +++ /dev/null | |||
| @@ -1,752 +0,0 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <string> | ||
| 6 | #include <string_view> | ||
| 7 | |||
| 8 | #include <fmt/format.h> | ||
| 9 | |||
| 10 | #include "common/assert.h" | ||
| 11 | #include "common/common_types.h" | ||
| 12 | #include "video_core/shader/ast.h" | ||
| 13 | #include "video_core/shader/expr.h" | ||
| 14 | |||
| 15 | namespace VideoCommon::Shader { | ||
| 16 | |||
| 17 | ASTZipper::ASTZipper() = default; | ||
| 18 | |||
| 19 | void ASTZipper::Init(const ASTNode new_first, const ASTNode parent) { | ||
| 20 | ASSERT(new_first->manager == nullptr); | ||
| 21 | first = new_first; | ||
| 22 | last = new_first; | ||
| 23 | |||
| 24 | ASTNode current = first; | ||
| 25 | while (current) { | ||
| 26 | current->manager = this; | ||
| 27 | current->parent = parent; | ||
| 28 | last = current; | ||
| 29 | current = current->next; | ||
| 30 | } | ||
| 31 | } | ||
| 32 | |||
| 33 | void ASTZipper::PushBack(const ASTNode new_node) { | ||
| 34 | ASSERT(new_node->manager == nullptr); | ||
| 35 | new_node->previous = last; | ||
| 36 | if (last) { | ||
| 37 | last->next = new_node; | ||
| 38 | } | ||
| 39 | new_node->next.reset(); | ||
| 40 | last = new_node; | ||
| 41 | if (!first) { | ||
| 42 | first = new_node; | ||
| 43 | } | ||
| 44 | new_node->manager = this; | ||
| 45 | } | ||
| 46 | |||
| 47 | void ASTZipper::PushFront(const ASTNode new_node) { | ||
| 48 | ASSERT(new_node->manager == nullptr); | ||
| 49 | new_node->previous.reset(); | ||
| 50 | new_node->next = first; | ||
| 51 | if (first) { | ||
| 52 | first->previous = new_node; | ||
| 53 | } | ||
| 54 | if (last == first) { | ||
| 55 | last = new_node; | ||
| 56 | } | ||
| 57 | first = new_node; | ||
| 58 | new_node->manager = this; | ||
| 59 | } | ||
| 60 | |||
| 61 | void ASTZipper::InsertAfter(const ASTNode new_node, const ASTNode at_node) { | ||
| 62 | ASSERT(new_node->manager == nullptr); | ||
| 63 | if (!at_node) { | ||
| 64 | PushFront(new_node); | ||
| 65 | return; | ||
| 66 | } | ||
| 67 | const ASTNode next = at_node->next; | ||
| 68 | if (next) { | ||
| 69 | next->previous = new_node; | ||
| 70 | } | ||
| 71 | new_node->previous = at_node; | ||
| 72 | if (at_node == last) { | ||
| 73 | last = new_node; | ||
| 74 | } | ||
| 75 | new_node->next = next; | ||
| 76 | at_node->next = new_node; | ||
| 77 | new_node->manager = this; | ||
| 78 | } | ||
| 79 | |||
| 80 | void ASTZipper::InsertBefore(const ASTNode new_node, const ASTNode at_node) { | ||
| 81 | ASSERT(new_node->manager == nullptr); | ||
| 82 | if (!at_node) { | ||
| 83 | PushBack(new_node); | ||
| 84 | return; | ||
| 85 | } | ||
| 86 | const ASTNode previous = at_node->previous; | ||
| 87 | if (previous) { | ||
| 88 | previous->next = new_node; | ||
| 89 | } | ||
| 90 | new_node->next = at_node; | ||
| 91 | if (at_node == first) { | ||
| 92 | first = new_node; | ||
| 93 | } | ||
| 94 | new_node->previous = previous; | ||
| 95 | at_node->previous = new_node; | ||
| 96 | new_node->manager = this; | ||
| 97 | } | ||
| 98 | |||
| 99 | void ASTZipper::DetachTail(ASTNode node) { | ||
| 100 | ASSERT(node->manager == this); | ||
| 101 | if (node == first) { | ||
| 102 | first.reset(); | ||
| 103 | last.reset(); | ||
| 104 | return; | ||
| 105 | } | ||
| 106 | |||
| 107 | last = node->previous; | ||
| 108 | last->next.reset(); | ||
| 109 | node->previous.reset(); | ||
| 110 | |||
| 111 | ASTNode current = std::move(node); | ||
| 112 | while (current) { | ||
| 113 | current->manager = nullptr; | ||
| 114 | current->parent.reset(); | ||
| 115 | current = current->next; | ||
| 116 | } | ||
| 117 | } | ||
| 118 | |||
| 119 | void ASTZipper::DetachSegment(const ASTNode start, const ASTNode end) { | ||
| 120 | ASSERT(start->manager == this && end->manager == this); | ||
| 121 | if (start == end) { | ||
| 122 | DetachSingle(start); | ||
| 123 | return; | ||
| 124 | } | ||
| 125 | const ASTNode prev = start->previous; | ||
| 126 | const ASTNode post = end->next; | ||
| 127 | if (!prev) { | ||
| 128 | first = post; | ||
| 129 | } else { | ||
| 130 | prev->next = post; | ||
| 131 | } | ||
| 132 | if (!post) { | ||
| 133 | last = prev; | ||
| 134 | } else { | ||
| 135 | post->previous = prev; | ||
| 136 | } | ||
| 137 | start->previous.reset(); | ||
| 138 | end->next.reset(); | ||
| 139 | ASTNode current = start; | ||
| 140 | bool found = false; | ||
| 141 | while (current) { | ||
| 142 | current->manager = nullptr; | ||
| 143 | current->parent.reset(); | ||
| 144 | found |= current == end; | ||
| 145 | current = current->next; | ||
| 146 | } | ||
| 147 | ASSERT(found); | ||
| 148 | } | ||
| 149 | |||
| 150 | void ASTZipper::DetachSingle(const ASTNode node) { | ||
| 151 | ASSERT(node->manager == this); | ||
| 152 | const ASTNode prev = node->previous; | ||
| 153 | const ASTNode post = node->next; | ||
| 154 | node->previous.reset(); | ||
| 155 | node->next.reset(); | ||
| 156 | if (!prev) { | ||
| 157 | first = post; | ||
| 158 | } else { | ||
| 159 | prev->next = post; | ||
| 160 | } | ||
| 161 | if (!post) { | ||
| 162 | last = prev; | ||
| 163 | } else { | ||
| 164 | post->previous = prev; | ||
| 165 | } | ||
| 166 | |||
| 167 | node->manager = nullptr; | ||
| 168 | node->parent.reset(); | ||
| 169 | } | ||
| 170 | |||
| 171 | void ASTZipper::Remove(const ASTNode node) { | ||
| 172 | ASSERT(node->manager == this); | ||
| 173 | const ASTNode next = node->next; | ||
| 174 | const ASTNode previous = node->previous; | ||
| 175 | if (previous) { | ||
| 176 | previous->next = next; | ||
| 177 | } | ||
| 178 | if (next) { | ||
| 179 | next->previous = previous; | ||
| 180 | } | ||
| 181 | node->parent.reset(); | ||
| 182 | node->manager = nullptr; | ||
| 183 | if (node == last) { | ||
| 184 | last = previous; | ||
| 185 | } | ||
| 186 | if (node == first) { | ||
| 187 | first = next; | ||
| 188 | } | ||
| 189 | } | ||
| 190 | |||
| 191 | class ExprPrinter final { | ||
| 192 | public: | ||
| 193 | void operator()(const ExprAnd& expr) { | ||
| 194 | inner += "( "; | ||
| 195 | std::visit(*this, *expr.operand1); | ||
| 196 | inner += " && "; | ||
| 197 | std::visit(*this, *expr.operand2); | ||
| 198 | inner += ')'; | ||
| 199 | } | ||
| 200 | |||
| 201 | void operator()(const ExprOr& expr) { | ||
| 202 | inner += "( "; | ||
| 203 | std::visit(*this, *expr.operand1); | ||
| 204 | inner += " || "; | ||
| 205 | std::visit(*this, *expr.operand2); | ||
| 206 | inner += ')'; | ||
| 207 | } | ||
| 208 | |||
| 209 | void operator()(const ExprNot& expr) { | ||
| 210 | inner += "!"; | ||
| 211 | std::visit(*this, *expr.operand1); | ||
| 212 | } | ||
| 213 | |||
| 214 | void operator()(const ExprPredicate& expr) { | ||
| 215 | inner += fmt::format("P{}", expr.predicate); | ||
| 216 | } | ||
| 217 | |||
| 218 | void operator()(const ExprCondCode& expr) { | ||
| 219 | inner += fmt::format("CC{}", expr.cc); | ||
| 220 | } | ||
| 221 | |||
| 222 | void operator()(const ExprVar& expr) { | ||
| 223 | inner += fmt::format("V{}", expr.var_index); | ||
| 224 | } | ||
| 225 | |||
| 226 | void operator()(const ExprBoolean& expr) { | ||
| 227 | inner += expr.value ? "true" : "false"; | ||
| 228 | } | ||
| 229 | |||
| 230 | void operator()(const ExprGprEqual& expr) { | ||
| 231 | inner += fmt::format("(gpr_{} == {})", expr.gpr, expr.value); | ||
| 232 | } | ||
| 233 | |||
| 234 | const std::string& GetResult() const { | ||
| 235 | return inner; | ||
| 236 | } | ||
| 237 | |||
| 238 | private: | ||
| 239 | std::string inner; | ||
| 240 | }; | ||
| 241 | |||
| 242 | class ASTPrinter { | ||
| 243 | public: | ||
| 244 | void operator()(const ASTProgram& ast) { | ||
| 245 | scope++; | ||
| 246 | inner += "program {\n"; | ||
| 247 | ASTNode current = ast.nodes.GetFirst(); | ||
| 248 | while (current) { | ||
| 249 | Visit(current); | ||
| 250 | current = current->GetNext(); | ||
| 251 | } | ||
| 252 | inner += "}\n"; | ||
| 253 | scope--; | ||
| 254 | } | ||
| 255 | |||
| 256 | void operator()(const ASTIfThen& ast) { | ||
| 257 | ExprPrinter expr_parser{}; | ||
| 258 | std::visit(expr_parser, *ast.condition); | ||
| 259 | inner += fmt::format("{}if ({}) {{\n", Indent(), expr_parser.GetResult()); | ||
| 260 | scope++; | ||
| 261 | ASTNode current = ast.nodes.GetFirst(); | ||
| 262 | while (current) { | ||
| 263 | Visit(current); | ||
| 264 | current = current->GetNext(); | ||
| 265 | } | ||
| 266 | scope--; | ||
| 267 | inner += fmt::format("{}}}\n", Indent()); | ||
| 268 | } | ||
| 269 | |||
| 270 | void operator()(const ASTIfElse& ast) { | ||
| 271 | inner += Indent(); | ||
| 272 | inner += "else {\n"; | ||
| 273 | |||
| 274 | scope++; | ||
| 275 | ASTNode current = ast.nodes.GetFirst(); | ||
| 276 | while (current) { | ||
| 277 | Visit(current); | ||
| 278 | current = current->GetNext(); | ||
| 279 | } | ||
| 280 | scope--; | ||
| 281 | |||
| 282 | inner += Indent(); | ||
| 283 | inner += "}\n"; | ||
| 284 | } | ||
| 285 | |||
| 286 | void operator()(const ASTBlockEncoded& ast) { | ||
| 287 | inner += fmt::format("{}Block({}, {});\n", Indent(), ast.start, ast.end); | ||
| 288 | } | ||
| 289 | |||
| 290 | void operator()([[maybe_unused]] const ASTBlockDecoded& ast) { | ||
| 291 | inner += Indent(); | ||
| 292 | inner += "Block;\n"; | ||
| 293 | } | ||
| 294 | |||
| 295 | void operator()(const ASTVarSet& ast) { | ||
| 296 | ExprPrinter expr_parser{}; | ||
| 297 | std::visit(expr_parser, *ast.condition); | ||
| 298 | inner += fmt::format("{}V{} := {};\n", Indent(), ast.index, expr_parser.GetResult()); | ||
| 299 | } | ||
| 300 | |||
| 301 | void operator()(const ASTLabel& ast) { | ||
| 302 | inner += fmt::format("Label_{}:\n", ast.index); | ||
| 303 | } | ||
| 304 | |||
| 305 | void operator()(const ASTGoto& ast) { | ||
| 306 | ExprPrinter expr_parser{}; | ||
| 307 | std::visit(expr_parser, *ast.condition); | ||
| 308 | inner += | ||
| 309 | fmt::format("{}({}) -> goto Label_{};\n", Indent(), expr_parser.GetResult(), ast.label); | ||
| 310 | } | ||
| 311 | |||
| 312 | void operator()(const ASTDoWhile& ast) { | ||
| 313 | ExprPrinter expr_parser{}; | ||
| 314 | std::visit(expr_parser, *ast.condition); | ||
| 315 | inner += fmt::format("{}do {{\n", Indent()); | ||
| 316 | scope++; | ||
| 317 | ASTNode current = ast.nodes.GetFirst(); | ||
| 318 | while (current) { | ||
| 319 | Visit(current); | ||
| 320 | current = current->GetNext(); | ||
| 321 | } | ||
| 322 | scope--; | ||
| 323 | inner += fmt::format("{}}} while ({});\n", Indent(), expr_parser.GetResult()); | ||
| 324 | } | ||
| 325 | |||
| 326 | void operator()(const ASTReturn& ast) { | ||
| 327 | ExprPrinter expr_parser{}; | ||
| 328 | std::visit(expr_parser, *ast.condition); | ||
| 329 | inner += fmt::format("{}({}) -> {};\n", Indent(), expr_parser.GetResult(), | ||
| 330 | ast.kills ? "discard" : "exit"); | ||
| 331 | } | ||
| 332 | |||
| 333 | void operator()(const ASTBreak& ast) { | ||
| 334 | ExprPrinter expr_parser{}; | ||
| 335 | std::visit(expr_parser, *ast.condition); | ||
| 336 | inner += fmt::format("{}({}) -> break;\n", Indent(), expr_parser.GetResult()); | ||
| 337 | } | ||
| 338 | |||
| 339 | void Visit(const ASTNode& node) { | ||
| 340 | std::visit(*this, *node->GetInnerData()); | ||
| 341 | } | ||
| 342 | |||
| 343 | const std::string& GetResult() const { | ||
| 344 | return inner; | ||
| 345 | } | ||
| 346 | |||
| 347 | private: | ||
| 348 | std::string_view Indent() { | ||
| 349 | if (space_segment_scope == scope) { | ||
| 350 | return space_segment; | ||
| 351 | } | ||
| 352 | |||
| 353 | // Ensure that we don't exceed our view. | ||
| 354 | ASSERT(scope * 2 < spaces.size()); | ||
| 355 | |||
| 356 | space_segment = spaces.substr(0, scope * 2); | ||
| 357 | space_segment_scope = scope; | ||
| 358 | return space_segment; | ||
| 359 | } | ||
| 360 | |||
| 361 | std::string inner{}; | ||
| 362 | std::string_view space_segment; | ||
| 363 | |||
| 364 | u32 scope{}; | ||
| 365 | u32 space_segment_scope{}; | ||
| 366 | |||
| 367 | static constexpr std::string_view spaces{" "}; | ||
| 368 | }; | ||
| 369 | |||
| 370 | std::string ASTManager::Print() const { | ||
| 371 | ASTPrinter printer{}; | ||
| 372 | printer.Visit(main_node); | ||
| 373 | return printer.GetResult(); | ||
| 374 | } | ||
| 375 | |||
| 376 | ASTManager::ASTManager(bool do_full_decompile, bool disable_else_derivation_) | ||
| 377 | : full_decompile{do_full_decompile}, disable_else_derivation{disable_else_derivation_} {} | ||
| 378 | |||
| 379 | ASTManager::~ASTManager() { | ||
| 380 | Clear(); | ||
| 381 | } | ||
| 382 | |||
| 383 | void ASTManager::Init() { | ||
| 384 | main_node = ASTBase::Make<ASTProgram>(ASTNode{}); | ||
| 385 | program = std::get_if<ASTProgram>(main_node->GetInnerData()); | ||
| 386 | false_condition = MakeExpr<ExprBoolean>(false); | ||
| 387 | } | ||
| 388 | |||
| 389 | void ASTManager::DeclareLabel(u32 address) { | ||
| 390 | const auto pair = labels_map.emplace(address, labels_count); | ||
| 391 | if (pair.second) { | ||
| 392 | labels_count++; | ||
| 393 | labels.resize(labels_count); | ||
| 394 | } | ||
| 395 | } | ||
| 396 | |||
| 397 | void ASTManager::InsertLabel(u32 address) { | ||
| 398 | const u32 index = labels_map[address]; | ||
| 399 | const ASTNode label = ASTBase::Make<ASTLabel>(main_node, index); | ||
| 400 | labels[index] = label; | ||
| 401 | program->nodes.PushBack(label); | ||
| 402 | } | ||
| 403 | |||
| 404 | void ASTManager::InsertGoto(Expr condition, u32 address) { | ||
| 405 | const u32 index = labels_map[address]; | ||
| 406 | const ASTNode goto_node = ASTBase::Make<ASTGoto>(main_node, std::move(condition), index); | ||
| 407 | gotos.push_back(goto_node); | ||
| 408 | program->nodes.PushBack(goto_node); | ||
| 409 | } | ||
| 410 | |||
| 411 | void ASTManager::InsertBlock(u32 start_address, u32 end_address) { | ||
| 412 | ASTNode block = ASTBase::Make<ASTBlockEncoded>(main_node, start_address, end_address); | ||
| 413 | program->nodes.PushBack(std::move(block)); | ||
| 414 | } | ||
| 415 | |||
| 416 | void ASTManager::InsertReturn(Expr condition, bool kills) { | ||
| 417 | ASTNode node = ASTBase::Make<ASTReturn>(main_node, std::move(condition), kills); | ||
| 418 | program->nodes.PushBack(std::move(node)); | ||
| 419 | } | ||
| 420 | |||
| 421 | // The decompile algorithm is based on | ||
| 422 | // "Taming control flow: A structured approach to eliminating goto statements" | ||
| 423 | // by AM Erosa, LJ Hendren 1994. In general, the idea is to get gotos to be | ||
| 424 | // on the same structured level as the label which they jump to. This is done, | ||
| 425 | // through outward/inward movements and lifting. Once they are at the same | ||
| 426 | // level, you can enclose them in an "if" structure or a "do-while" structure. | ||
| 427 | void ASTManager::Decompile() { | ||
| 428 | auto it = gotos.begin(); | ||
| 429 | while (it != gotos.end()) { | ||
| 430 | const ASTNode goto_node = *it; | ||
| 431 | const auto label_index = goto_node->GetGotoLabel(); | ||
| 432 | if (!label_index) { | ||
| 433 | return; | ||
| 434 | } | ||
| 435 | const ASTNode label = labels[*label_index]; | ||
| 436 | if (!full_decompile) { | ||
| 437 | // We only decompile backward jumps | ||
| 438 | if (!IsBackwardsJump(goto_node, label)) { | ||
| 439 | it++; | ||
| 440 | continue; | ||
| 441 | } | ||
| 442 | } | ||
| 443 | if (IndirectlyRelated(goto_node, label)) { | ||
| 444 | while (!DirectlyRelated(goto_node, label)) { | ||
| 445 | MoveOutward(goto_node); | ||
| 446 | } | ||
| 447 | } | ||
| 448 | if (DirectlyRelated(goto_node, label)) { | ||
| 449 | u32 goto_level = goto_node->GetLevel(); | ||
| 450 | const u32 label_level = label->GetLevel(); | ||
| 451 | while (label_level < goto_level) { | ||
| 452 | MoveOutward(goto_node); | ||
| 453 | goto_level--; | ||
| 454 | } | ||
| 455 | // TODO(Blinkhawk): Implement Lifting and Inward Movements | ||
| 456 | } | ||
| 457 | if (label->GetParent() == goto_node->GetParent()) { | ||
| 458 | bool is_loop = false; | ||
| 459 | ASTNode current = goto_node->GetPrevious(); | ||
| 460 | while (current) { | ||
| 461 | if (current == label) { | ||
| 462 | is_loop = true; | ||
| 463 | break; | ||
| 464 | } | ||
| 465 | current = current->GetPrevious(); | ||
| 466 | } | ||
| 467 | |||
| 468 | if (is_loop) { | ||
| 469 | EncloseDoWhile(goto_node, label); | ||
| 470 | } else { | ||
| 471 | EncloseIfThen(goto_node, label); | ||
| 472 | } | ||
| 473 | it = gotos.erase(it); | ||
| 474 | continue; | ||
| 475 | } | ||
| 476 | it++; | ||
| 477 | } | ||
| 478 | if (full_decompile) { | ||
| 479 | for (const ASTNode& label : labels) { | ||
| 480 | auto& manager = label->GetManager(); | ||
| 481 | manager.Remove(label); | ||
| 482 | } | ||
| 483 | labels.clear(); | ||
| 484 | } else { | ||
| 485 | auto label_it = labels.begin(); | ||
| 486 | while (label_it != labels.end()) { | ||
| 487 | bool can_remove = true; | ||
| 488 | ASTNode label = *label_it; | ||
| 489 | for (const ASTNode& goto_node : gotos) { | ||
| 490 | const auto label_index = goto_node->GetGotoLabel(); | ||
| 491 | if (!label_index) { | ||
| 492 | return; | ||
| 493 | } | ||
| 494 | ASTNode& glabel = labels[*label_index]; | ||
| 495 | if (glabel == label) { | ||
| 496 | can_remove = false; | ||
| 497 | break; | ||
| 498 | } | ||
| 499 | } | ||
| 500 | if (can_remove) { | ||
| 501 | label->MarkLabelUnused(); | ||
| 502 | } | ||
| 503 | } | ||
| 504 | } | ||
| 505 | } | ||
| 506 | |||
| 507 | bool ASTManager::IsBackwardsJump(ASTNode goto_node, ASTNode label_node) const { | ||
| 508 | u32 goto_level = goto_node->GetLevel(); | ||
| 509 | u32 label_level = label_node->GetLevel(); | ||
| 510 | while (goto_level > label_level) { | ||
| 511 | goto_level--; | ||
| 512 | goto_node = goto_node->GetParent(); | ||
| 513 | } | ||
| 514 | while (label_level > goto_level) { | ||
| 515 | label_level--; | ||
| 516 | label_node = label_node->GetParent(); | ||
| 517 | } | ||
| 518 | while (goto_node->GetParent() != label_node->GetParent()) { | ||
| 519 | goto_node = goto_node->GetParent(); | ||
| 520 | label_node = label_node->GetParent(); | ||
| 521 | } | ||
| 522 | ASTNode current = goto_node->GetPrevious(); | ||
| 523 | while (current) { | ||
| 524 | if (current == label_node) { | ||
| 525 | return true; | ||
| 526 | } | ||
| 527 | current = current->GetPrevious(); | ||
| 528 | } | ||
| 529 | return false; | ||
| 530 | } | ||
| 531 | |||
| 532 | bool ASTManager::IndirectlyRelated(const ASTNode& first, const ASTNode& second) const { | ||
| 533 | return !(first->GetParent() == second->GetParent() || DirectlyRelated(first, second)); | ||
| 534 | } | ||
| 535 | |||
| 536 | bool ASTManager::DirectlyRelated(const ASTNode& first, const ASTNode& second) const { | ||
| 537 | if (first->GetParent() == second->GetParent()) { | ||
| 538 | return false; | ||
| 539 | } | ||
| 540 | const u32 first_level = first->GetLevel(); | ||
| 541 | const u32 second_level = second->GetLevel(); | ||
| 542 | u32 min_level; | ||
| 543 | u32 max_level; | ||
| 544 | ASTNode max; | ||
| 545 | ASTNode min; | ||
| 546 | if (first_level > second_level) { | ||
| 547 | min_level = second_level; | ||
| 548 | min = second; | ||
| 549 | max_level = first_level; | ||
| 550 | max = first; | ||
| 551 | } else { | ||
| 552 | min_level = first_level; | ||
| 553 | min = first; | ||
| 554 | max_level = second_level; | ||
| 555 | max = second; | ||
| 556 | } | ||
| 557 | |||
| 558 | while (max_level > min_level) { | ||
| 559 | max_level--; | ||
| 560 | max = max->GetParent(); | ||
| 561 | } | ||
| 562 | |||
| 563 | return min->GetParent() == max->GetParent(); | ||
| 564 | } | ||
| 565 | |||
| 566 | void ASTManager::ShowCurrentState(std::string_view state) const { | ||
| 567 | LOG_CRITICAL(HW_GPU, "\nState {}:\n\n{}\n", state, Print()); | ||
| 568 | SanityCheck(); | ||
| 569 | } | ||
| 570 | |||
| 571 | void ASTManager::SanityCheck() const { | ||
| 572 | for (const auto& label : labels) { | ||
| 573 | if (!label->GetParent()) { | ||
| 574 | LOG_CRITICAL(HW_GPU, "Sanity Check Failed"); | ||
| 575 | } | ||
| 576 | } | ||
| 577 | } | ||
| 578 | |||
| 579 | void ASTManager::EncloseDoWhile(ASTNode goto_node, ASTNode label) { | ||
| 580 | ASTZipper& zipper = goto_node->GetManager(); | ||
| 581 | const ASTNode loop_start = label->GetNext(); | ||
| 582 | if (loop_start == goto_node) { | ||
| 583 | zipper.Remove(goto_node); | ||
| 584 | return; | ||
| 585 | } | ||
| 586 | const ASTNode parent = label->GetParent(); | ||
| 587 | const Expr condition = goto_node->GetGotoCondition(); | ||
| 588 | zipper.DetachSegment(loop_start, goto_node); | ||
| 589 | const ASTNode do_while_node = ASTBase::Make<ASTDoWhile>(parent, condition); | ||
| 590 | ASTZipper* sub_zipper = do_while_node->GetSubNodes(); | ||
| 591 | sub_zipper->Init(loop_start, do_while_node); | ||
| 592 | zipper.InsertAfter(do_while_node, label); | ||
| 593 | sub_zipper->Remove(goto_node); | ||
| 594 | } | ||
| 595 | |||
| 596 | void ASTManager::EncloseIfThen(ASTNode goto_node, ASTNode label) { | ||
| 597 | ASTZipper& zipper = goto_node->GetManager(); | ||
| 598 | const ASTNode if_end = label->GetPrevious(); | ||
| 599 | if (if_end == goto_node) { | ||
| 600 | zipper.Remove(goto_node); | ||
| 601 | return; | ||
| 602 | } | ||
| 603 | const ASTNode prev = goto_node->GetPrevious(); | ||
| 604 | const Expr condition = goto_node->GetGotoCondition(); | ||
| 605 | bool do_else = false; | ||
| 606 | if (!disable_else_derivation && prev->IsIfThen()) { | ||
| 607 | const Expr if_condition = prev->GetIfCondition(); | ||
| 608 | do_else = ExprAreEqual(if_condition, condition); | ||
| 609 | } | ||
| 610 | const ASTNode parent = label->GetParent(); | ||
| 611 | zipper.DetachSegment(goto_node, if_end); | ||
| 612 | ASTNode if_node; | ||
| 613 | if (do_else) { | ||
| 614 | if_node = ASTBase::Make<ASTIfElse>(parent); | ||
| 615 | } else { | ||
| 616 | Expr neg_condition = MakeExprNot(condition); | ||
| 617 | if_node = ASTBase::Make<ASTIfThen>(parent, neg_condition); | ||
| 618 | } | ||
| 619 | ASTZipper* sub_zipper = if_node->GetSubNodes(); | ||
| 620 | sub_zipper->Init(goto_node, if_node); | ||
| 621 | zipper.InsertAfter(if_node, prev); | ||
| 622 | sub_zipper->Remove(goto_node); | ||
| 623 | } | ||
| 624 | |||
| 625 | void ASTManager::MoveOutward(ASTNode goto_node) { | ||
| 626 | ASTZipper& zipper = goto_node->GetManager(); | ||
| 627 | const ASTNode parent = goto_node->GetParent(); | ||
| 628 | ASTZipper& zipper2 = parent->GetManager(); | ||
| 629 | const ASTNode grandpa = parent->GetParent(); | ||
| 630 | const bool is_loop = parent->IsLoop(); | ||
| 631 | const bool is_else = parent->IsIfElse(); | ||
| 632 | const bool is_if = parent->IsIfThen(); | ||
| 633 | |||
| 634 | const ASTNode prev = goto_node->GetPrevious(); | ||
| 635 | const ASTNode post = goto_node->GetNext(); | ||
| 636 | |||
| 637 | const Expr condition = goto_node->GetGotoCondition(); | ||
| 638 | zipper.DetachSingle(goto_node); | ||
| 639 | if (is_loop) { | ||
| 640 | const u32 var_index = NewVariable(); | ||
| 641 | const Expr var_condition = MakeExpr<ExprVar>(var_index); | ||
| 642 | const ASTNode var_node = ASTBase::Make<ASTVarSet>(parent, var_index, condition); | ||
| 643 | const ASTNode var_node_init = ASTBase::Make<ASTVarSet>(parent, var_index, false_condition); | ||
| 644 | zipper2.InsertBefore(var_node_init, parent); | ||
| 645 | zipper.InsertAfter(var_node, prev); | ||
| 646 | goto_node->SetGotoCondition(var_condition); | ||
| 647 | const ASTNode break_node = ASTBase::Make<ASTBreak>(parent, var_condition); | ||
| 648 | zipper.InsertAfter(break_node, var_node); | ||
| 649 | } else if (is_if || is_else) { | ||
| 650 | const u32 var_index = NewVariable(); | ||
| 651 | const Expr var_condition = MakeExpr<ExprVar>(var_index); | ||
| 652 | const ASTNode var_node = ASTBase::Make<ASTVarSet>(parent, var_index, condition); | ||
| 653 | const ASTNode var_node_init = ASTBase::Make<ASTVarSet>(parent, var_index, false_condition); | ||
| 654 | if (is_if) { | ||
| 655 | zipper2.InsertBefore(var_node_init, parent); | ||
| 656 | } else { | ||
| 657 | zipper2.InsertBefore(var_node_init, parent->GetPrevious()); | ||
| 658 | } | ||
| 659 | zipper.InsertAfter(var_node, prev); | ||
| 660 | goto_node->SetGotoCondition(var_condition); | ||
| 661 | if (post) { | ||
| 662 | zipper.DetachTail(post); | ||
| 663 | const ASTNode if_node = ASTBase::Make<ASTIfThen>(parent, MakeExprNot(var_condition)); | ||
| 664 | ASTZipper* sub_zipper = if_node->GetSubNodes(); | ||
| 665 | sub_zipper->Init(post, if_node); | ||
| 666 | zipper.InsertAfter(if_node, var_node); | ||
| 667 | } | ||
| 668 | } else { | ||
| 669 | UNREACHABLE(); | ||
| 670 | } | ||
| 671 | const ASTNode next = parent->GetNext(); | ||
| 672 | if (is_if && next && next->IsIfElse()) { | ||
| 673 | zipper2.InsertAfter(goto_node, next); | ||
| 674 | goto_node->SetParent(grandpa); | ||
| 675 | return; | ||
| 676 | } | ||
| 677 | zipper2.InsertAfter(goto_node, parent); | ||
| 678 | goto_node->SetParent(grandpa); | ||
| 679 | } | ||
| 680 | |||
| 681 | class ASTClearer { | ||
| 682 | public: | ||
| 683 | ASTClearer() = default; | ||
| 684 | |||
| 685 | void operator()(const ASTProgram& ast) { | ||
| 686 | ASTNode current = ast.nodes.GetFirst(); | ||
| 687 | while (current) { | ||
| 688 | Visit(current); | ||
| 689 | current = current->GetNext(); | ||
| 690 | } | ||
| 691 | } | ||
| 692 | |||
| 693 | void operator()(const ASTIfThen& ast) { | ||
| 694 | ASTNode current = ast.nodes.GetFirst(); | ||
| 695 | while (current) { | ||
| 696 | Visit(current); | ||
| 697 | current = current->GetNext(); | ||
| 698 | } | ||
| 699 | } | ||
| 700 | |||
| 701 | void operator()(const ASTIfElse& ast) { | ||
| 702 | ASTNode current = ast.nodes.GetFirst(); | ||
| 703 | while (current) { | ||
| 704 | Visit(current); | ||
| 705 | current = current->GetNext(); | ||
| 706 | } | ||
| 707 | } | ||
| 708 | |||
| 709 | void operator()([[maybe_unused]] const ASTBlockEncoded& ast) {} | ||
| 710 | |||
| 711 | void operator()(ASTBlockDecoded& ast) { | ||
| 712 | ast.nodes.clear(); | ||
| 713 | } | ||
| 714 | |||
| 715 | void operator()([[maybe_unused]] const ASTVarSet& ast) {} | ||
| 716 | |||
| 717 | void operator()([[maybe_unused]] const ASTLabel& ast) {} | ||
| 718 | |||
| 719 | void operator()([[maybe_unused]] const ASTGoto& ast) {} | ||
| 720 | |||
| 721 | void operator()(const ASTDoWhile& ast) { | ||
| 722 | ASTNode current = ast.nodes.GetFirst(); | ||
| 723 | while (current) { | ||
| 724 | Visit(current); | ||
| 725 | current = current->GetNext(); | ||
| 726 | } | ||
| 727 | } | ||
| 728 | |||
| 729 | void operator()([[maybe_unused]] const ASTReturn& ast) {} | ||
| 730 | |||
| 731 | void operator()([[maybe_unused]] const ASTBreak& ast) {} | ||
| 732 | |||
| 733 | void Visit(const ASTNode& node) { | ||
| 734 | std::visit(*this, *node->GetInnerData()); | ||
| 735 | node->Clear(); | ||
| 736 | } | ||
| 737 | }; | ||
| 738 | |||
| 739 | void ASTManager::Clear() { | ||
| 740 | if (!main_node) { | ||
| 741 | return; | ||
| 742 | } | ||
| 743 | ASTClearer clearer{}; | ||
| 744 | clearer.Visit(main_node); | ||
| 745 | main_node.reset(); | ||
| 746 | program = nullptr; | ||
| 747 | labels_map.clear(); | ||
| 748 | labels.clear(); | ||
| 749 | gotos.clear(); | ||
| 750 | } | ||
| 751 | |||
| 752 | } // namespace VideoCommon::Shader | ||
diff --git a/src/video_core/shader/ast.h b/src/video_core/shader/ast.h deleted file mode 100644 index dc49b369e..000000000 --- a/src/video_core/shader/ast.h +++ /dev/null | |||
| @@ -1,398 +0,0 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <functional> | ||
| 8 | #include <list> | ||
| 9 | #include <memory> | ||
| 10 | #include <optional> | ||
| 11 | #include <string> | ||
| 12 | #include <unordered_map> | ||
| 13 | #include <vector> | ||
| 14 | |||
| 15 | #include "video_core/shader/expr.h" | ||
| 16 | #include "video_core/shader/node.h" | ||
| 17 | |||
| 18 | namespace VideoCommon::Shader { | ||
| 19 | |||
| 20 | class ASTBase; | ||
| 21 | class ASTBlockDecoded; | ||
| 22 | class ASTBlockEncoded; | ||
| 23 | class ASTBreak; | ||
| 24 | class ASTDoWhile; | ||
| 25 | class ASTGoto; | ||
| 26 | class ASTIfElse; | ||
| 27 | class ASTIfThen; | ||
| 28 | class ASTLabel; | ||
| 29 | class ASTProgram; | ||
| 30 | class ASTReturn; | ||
| 31 | class ASTVarSet; | ||
| 32 | |||
| 33 | using ASTData = std::variant<ASTProgram, ASTIfThen, ASTIfElse, ASTBlockEncoded, ASTBlockDecoded, | ||
| 34 | ASTVarSet, ASTGoto, ASTLabel, ASTDoWhile, ASTReturn, ASTBreak>; | ||
| 35 | |||
| 36 | using ASTNode = std::shared_ptr<ASTBase>; | ||
| 37 | |||
| 38 | enum class ASTZipperType : u32 { | ||
| 39 | Program, | ||
| 40 | IfThen, | ||
| 41 | IfElse, | ||
| 42 | Loop, | ||
| 43 | }; | ||
| 44 | |||
| 45 | class ASTZipper final { | ||
| 46 | public: | ||
| 47 | explicit ASTZipper(); | ||
| 48 | |||
| 49 | void Init(ASTNode first, ASTNode parent); | ||
| 50 | |||
| 51 | ASTNode GetFirst() const { | ||
| 52 | return first; | ||
| 53 | } | ||
| 54 | |||
| 55 | ASTNode GetLast() const { | ||
| 56 | return last; | ||
| 57 | } | ||
| 58 | |||
| 59 | void PushBack(ASTNode new_node); | ||
| 60 | void PushFront(ASTNode new_node); | ||
| 61 | void InsertAfter(ASTNode new_node, ASTNode at_node); | ||
| 62 | void InsertBefore(ASTNode new_node, ASTNode at_node); | ||
| 63 | void DetachTail(ASTNode node); | ||
| 64 | void DetachSingle(ASTNode node); | ||
| 65 | void DetachSegment(ASTNode start, ASTNode end); | ||
| 66 | void Remove(ASTNode node); | ||
| 67 | |||
| 68 | ASTNode first; | ||
| 69 | ASTNode last; | ||
| 70 | }; | ||
| 71 | |||
| 72 | class ASTProgram { | ||
| 73 | public: | ||
| 74 | ASTZipper nodes{}; | ||
| 75 | }; | ||
| 76 | |||
| 77 | class ASTIfThen { | ||
| 78 | public: | ||
| 79 | explicit ASTIfThen(Expr condition_) : condition{std::move(condition_)} {} | ||
| 80 | Expr condition; | ||
| 81 | ASTZipper nodes{}; | ||
| 82 | }; | ||
| 83 | |||
| 84 | class ASTIfElse { | ||
| 85 | public: | ||
| 86 | ASTZipper nodes{}; | ||
| 87 | }; | ||
| 88 | |||
| 89 | class ASTBlockEncoded { | ||
| 90 | public: | ||
| 91 | explicit ASTBlockEncoded(u32 start_, u32 _) : start{start_}, end{_} {} | ||
| 92 | u32 start; | ||
| 93 | u32 end; | ||
| 94 | }; | ||
| 95 | |||
| 96 | class ASTBlockDecoded { | ||
| 97 | public: | ||
| 98 | explicit ASTBlockDecoded(NodeBlock&& new_nodes_) : nodes(std::move(new_nodes_)) {} | ||
| 99 | NodeBlock nodes; | ||
| 100 | }; | ||
| 101 | |||
| 102 | class ASTVarSet { | ||
| 103 | public: | ||
| 104 | explicit ASTVarSet(u32 index_, Expr condition_) | ||
| 105 | : index{index_}, condition{std::move(condition_)} {} | ||
| 106 | |||
| 107 | u32 index; | ||
| 108 | Expr condition; | ||
| 109 | }; | ||
| 110 | |||
| 111 | class ASTLabel { | ||
| 112 | public: | ||
| 113 | explicit ASTLabel(u32 index_) : index{index_} {} | ||
| 114 | u32 index; | ||
| 115 | bool unused{}; | ||
| 116 | }; | ||
| 117 | |||
| 118 | class ASTGoto { | ||
| 119 | public: | ||
| 120 | explicit ASTGoto(Expr condition_, u32 label_) | ||
| 121 | : condition{std::move(condition_)}, label{label_} {} | ||
| 122 | |||
| 123 | Expr condition; | ||
| 124 | u32 label; | ||
| 125 | }; | ||
| 126 | |||
| 127 | class ASTDoWhile { | ||
| 128 | public: | ||
| 129 | explicit ASTDoWhile(Expr condition_) : condition{std::move(condition_)} {} | ||
| 130 | Expr condition; | ||
| 131 | ASTZipper nodes{}; | ||
| 132 | }; | ||
| 133 | |||
| 134 | class ASTReturn { | ||
| 135 | public: | ||
| 136 | explicit ASTReturn(Expr condition_, bool kills_) | ||
| 137 | : condition{std::move(condition_)}, kills{kills_} {} | ||
| 138 | |||
| 139 | Expr condition; | ||
| 140 | bool kills; | ||
| 141 | }; | ||
| 142 | |||
| 143 | class ASTBreak { | ||
| 144 | public: | ||
| 145 | explicit ASTBreak(Expr condition_) : condition{std::move(condition_)} {} | ||
| 146 | Expr condition; | ||
| 147 | }; | ||
| 148 | |||
| 149 | class ASTBase { | ||
| 150 | public: | ||
| 151 | explicit ASTBase(ASTNode parent_, ASTData data_) | ||
| 152 | : data{std::move(data_)}, parent{std::move(parent_)} {} | ||
| 153 | |||
| 154 | template <class U, class... Args> | ||
| 155 | static ASTNode Make(ASTNode parent, Args&&... args) { | ||
| 156 | return std::make_shared<ASTBase>(std::move(parent), | ||
| 157 | ASTData(U(std::forward<Args>(args)...))); | ||
| 158 | } | ||
| 159 | |||
| 160 | void SetParent(ASTNode new_parent) { | ||
| 161 | parent = std::move(new_parent); | ||
| 162 | } | ||
| 163 | |||
| 164 | ASTNode& GetParent() { | ||
| 165 | return parent; | ||
| 166 | } | ||
| 167 | |||
| 168 | const ASTNode& GetParent() const { | ||
| 169 | return parent; | ||
| 170 | } | ||
| 171 | |||
| 172 | u32 GetLevel() const { | ||
| 173 | u32 level = 0; | ||
| 174 | auto next_parent = parent; | ||
| 175 | while (next_parent) { | ||
| 176 | next_parent = next_parent->GetParent(); | ||
| 177 | level++; | ||
| 178 | } | ||
| 179 | return level; | ||
| 180 | } | ||
| 181 | |||
| 182 | ASTData* GetInnerData() { | ||
| 183 | return &data; | ||
| 184 | } | ||
| 185 | |||
| 186 | const ASTData* GetInnerData() const { | ||
| 187 | return &data; | ||
| 188 | } | ||
| 189 | |||
| 190 | ASTNode GetNext() const { | ||
| 191 | return next; | ||
| 192 | } | ||
| 193 | |||
| 194 | ASTNode GetPrevious() const { | ||
| 195 | return previous; | ||
| 196 | } | ||
| 197 | |||
| 198 | ASTZipper& GetManager() { | ||
| 199 | return *manager; | ||
| 200 | } | ||
| 201 | |||
| 202 | const ASTZipper& GetManager() const { | ||
| 203 | return *manager; | ||
| 204 | } | ||
| 205 | |||
| 206 | std::optional<u32> GetGotoLabel() const { | ||
| 207 | if (const auto* inner = std::get_if<ASTGoto>(&data)) { | ||
| 208 | return {inner->label}; | ||
| 209 | } | ||
| 210 | return std::nullopt; | ||
| 211 | } | ||
| 212 | |||
| 213 | Expr GetGotoCondition() const { | ||
| 214 | if (const auto* inner = std::get_if<ASTGoto>(&data)) { | ||
| 215 | return inner->condition; | ||
| 216 | } | ||
| 217 | return nullptr; | ||
| 218 | } | ||
| 219 | |||
| 220 | void MarkLabelUnused() { | ||
| 221 | if (auto* inner = std::get_if<ASTLabel>(&data)) { | ||
| 222 | inner->unused = true; | ||
| 223 | } | ||
| 224 | } | ||
| 225 | |||
| 226 | bool IsLabelUnused() const { | ||
| 227 | if (const auto* inner = std::get_if<ASTLabel>(&data)) { | ||
| 228 | return inner->unused; | ||
| 229 | } | ||
| 230 | return true; | ||
| 231 | } | ||
| 232 | |||
| 233 | std::optional<u32> GetLabelIndex() const { | ||
| 234 | if (const auto* inner = std::get_if<ASTLabel>(&data)) { | ||
| 235 | return {inner->index}; | ||
| 236 | } | ||
| 237 | return std::nullopt; | ||
| 238 | } | ||
| 239 | |||
| 240 | Expr GetIfCondition() const { | ||
| 241 | if (const auto* inner = std::get_if<ASTIfThen>(&data)) { | ||
| 242 | return inner->condition; | ||
| 243 | } | ||
| 244 | return nullptr; | ||
| 245 | } | ||
| 246 | |||
| 247 | void SetGotoCondition(Expr new_condition) { | ||
| 248 | if (auto* inner = std::get_if<ASTGoto>(&data)) { | ||
| 249 | inner->condition = std::move(new_condition); | ||
| 250 | } | ||
| 251 | } | ||
| 252 | |||
| 253 | bool IsIfThen() const { | ||
| 254 | return std::holds_alternative<ASTIfThen>(data); | ||
| 255 | } | ||
| 256 | |||
| 257 | bool IsIfElse() const { | ||
| 258 | return std::holds_alternative<ASTIfElse>(data); | ||
| 259 | } | ||
| 260 | |||
| 261 | bool IsBlockEncoded() const { | ||
| 262 | return std::holds_alternative<ASTBlockEncoded>(data); | ||
| 263 | } | ||
| 264 | |||
| 265 | void TransformBlockEncoded(NodeBlock&& nodes) { | ||
| 266 | data = ASTBlockDecoded(std::move(nodes)); | ||
| 267 | } | ||
| 268 | |||
| 269 | bool IsLoop() const { | ||
| 270 | return std::holds_alternative<ASTDoWhile>(data); | ||
| 271 | } | ||
| 272 | |||
| 273 | ASTZipper* GetSubNodes() { | ||
| 274 | if (std::holds_alternative<ASTProgram>(data)) { | ||
| 275 | return &std::get_if<ASTProgram>(&data)->nodes; | ||
| 276 | } | ||
| 277 | if (std::holds_alternative<ASTIfThen>(data)) { | ||
| 278 | return &std::get_if<ASTIfThen>(&data)->nodes; | ||
| 279 | } | ||
| 280 | if (std::holds_alternative<ASTIfElse>(data)) { | ||
| 281 | return &std::get_if<ASTIfElse>(&data)->nodes; | ||
| 282 | } | ||
| 283 | if (std::holds_alternative<ASTDoWhile>(data)) { | ||
| 284 | return &std::get_if<ASTDoWhile>(&data)->nodes; | ||
| 285 | } | ||
| 286 | return nullptr; | ||
| 287 | } | ||
| 288 | |||
| 289 | void Clear() { | ||
| 290 | next.reset(); | ||
| 291 | previous.reset(); | ||
| 292 | parent.reset(); | ||
| 293 | manager = nullptr; | ||
| 294 | } | ||
| 295 | |||
| 296 | private: | ||
| 297 | friend class ASTZipper; | ||
| 298 | |||
| 299 | ASTData data; | ||
| 300 | ASTNode parent; | ||
| 301 | ASTNode next; | ||
| 302 | ASTNode previous; | ||
| 303 | ASTZipper* manager{}; | ||
| 304 | }; | ||
| 305 | |||
| 306 | class ASTManager final { | ||
| 307 | public: | ||
| 308 | explicit ASTManager(bool do_full_decompile, bool disable_else_derivation_); | ||
| 309 | ~ASTManager(); | ||
| 310 | |||
| 311 | ASTManager(const ASTManager& o) = delete; | ||
| 312 | ASTManager& operator=(const ASTManager& other) = delete; | ||
| 313 | |||
| 314 | ASTManager(ASTManager&& other) noexcept = default; | ||
| 315 | ASTManager& operator=(ASTManager&& other) noexcept = default; | ||
| 316 | |||
| 317 | void Init(); | ||
| 318 | |||
| 319 | void DeclareLabel(u32 address); | ||
| 320 | |||
| 321 | void InsertLabel(u32 address); | ||
| 322 | |||
| 323 | void InsertGoto(Expr condition, u32 address); | ||
| 324 | |||
| 325 | void InsertBlock(u32 start_address, u32 end_address); | ||
| 326 | |||
| 327 | void InsertReturn(Expr condition, bool kills); | ||
| 328 | |||
| 329 | std::string Print() const; | ||
| 330 | |||
| 331 | void Decompile(); | ||
| 332 | |||
| 333 | void ShowCurrentState(std::string_view state) const; | ||
| 334 | |||
| 335 | void SanityCheck() const; | ||
| 336 | |||
| 337 | void Clear(); | ||
| 338 | |||
| 339 | bool IsFullyDecompiled() const { | ||
| 340 | if (full_decompile) { | ||
| 341 | return gotos.empty(); | ||
| 342 | } | ||
| 343 | |||
| 344 | for (ASTNode goto_node : gotos) { | ||
| 345 | auto label_index = goto_node->GetGotoLabel(); | ||
| 346 | if (!label_index) { | ||
| 347 | return false; | ||
| 348 | } | ||
| 349 | ASTNode glabel = labels[*label_index]; | ||
| 350 | if (IsBackwardsJump(goto_node, glabel)) { | ||
| 351 | return false; | ||
| 352 | } | ||
| 353 | } | ||
| 354 | return true; | ||
| 355 | } | ||
| 356 | |||
| 357 | ASTNode GetProgram() const { | ||
| 358 | return main_node; | ||
| 359 | } | ||
| 360 | |||
| 361 | u32 GetVariables() const { | ||
| 362 | return variables; | ||
| 363 | } | ||
| 364 | |||
| 365 | const std::vector<ASTNode>& GetLabels() const { | ||
| 366 | return labels; | ||
| 367 | } | ||
| 368 | |||
| 369 | private: | ||
| 370 | bool IsBackwardsJump(ASTNode goto_node, ASTNode label_node) const; | ||
| 371 | |||
| 372 | bool IndirectlyRelated(const ASTNode& first, const ASTNode& second) const; | ||
| 373 | |||
| 374 | bool DirectlyRelated(const ASTNode& first, const ASTNode& second) const; | ||
| 375 | |||
| 376 | void EncloseDoWhile(ASTNode goto_node, ASTNode label); | ||
| 377 | |||
| 378 | void EncloseIfThen(ASTNode goto_node, ASTNode label); | ||
| 379 | |||
| 380 | void MoveOutward(ASTNode goto_node); | ||
| 381 | |||
| 382 | u32 NewVariable() { | ||
| 383 | return variables++; | ||
| 384 | } | ||
| 385 | |||
| 386 | bool full_decompile{}; | ||
| 387 | bool disable_else_derivation{}; | ||
| 388 | std::unordered_map<u32, u32> labels_map{}; | ||
| 389 | u32 labels_count{}; | ||
| 390 | std::vector<ASTNode> labels{}; | ||
| 391 | std::list<ASTNode> gotos{}; | ||
| 392 | u32 variables{}; | ||
| 393 | ASTProgram* program{}; | ||
| 394 | ASTNode main_node{}; | ||
| 395 | Expr false_condition{}; | ||
| 396 | }; | ||
| 397 | |||
| 398 | } // namespace VideoCommon::Shader | ||
diff --git a/src/video_core/shader/async_shaders.cpp b/src/video_core/shader/async_shaders.cpp deleted file mode 100644 index 02adcf9c7..000000000 --- a/src/video_core/shader/async_shaders.cpp +++ /dev/null | |||
| @@ -1,234 +0,0 @@ | |||
| 1 | // Copyright 2020 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <condition_variable> | ||
| 6 | #include <mutex> | ||
| 7 | #include <thread> | ||
| 8 | #include <vector> | ||
| 9 | #include "video_core/engines/maxwell_3d.h" | ||
| 10 | #include "video_core/renderer_base.h" | ||
| 11 | #include "video_core/renderer_opengl/gl_shader_cache.h" | ||
| 12 | #include "video_core/shader/async_shaders.h" | ||
| 13 | |||
| 14 | namespace VideoCommon::Shader { | ||
| 15 | |||
| 16 | AsyncShaders::AsyncShaders(Core::Frontend::EmuWindow& emu_window_) : emu_window(emu_window_) {} | ||
| 17 | |||
| 18 | AsyncShaders::~AsyncShaders() { | ||
| 19 | KillWorkers(); | ||
| 20 | } | ||
| 21 | |||
| 22 | void AsyncShaders::AllocateWorkers() { | ||
| 23 | // Use at least one thread | ||
| 24 | u32 num_workers = 1; | ||
| 25 | |||
| 26 | // Deduce how many more threads we can use | ||
| 27 | const u32 thread_count = std::thread::hardware_concurrency(); | ||
| 28 | if (thread_count >= 8) { | ||
| 29 | // Increase async workers by 1 for every 2 threads >= 8 | ||
| 30 | num_workers += 1 + (thread_count - 8) / 2; | ||
| 31 | } | ||
| 32 | |||
| 33 | // If we already have workers queued, ignore | ||
| 34 | if (num_workers == worker_threads.size()) { | ||
| 35 | return; | ||
| 36 | } | ||
| 37 | |||
| 38 | // If workers already exist, clear them | ||
| 39 | if (!worker_threads.empty()) { | ||
| 40 | FreeWorkers(); | ||
| 41 | } | ||
| 42 | |||
| 43 | // Create workers | ||
| 44 | for (std::size_t i = 0; i < num_workers; i++) { | ||
| 45 | context_list.push_back(emu_window.CreateSharedContext()); | ||
| 46 | worker_threads.emplace_back(&AsyncShaders::ShaderCompilerThread, this, | ||
| 47 | context_list[i].get()); | ||
| 48 | } | ||
| 49 | } | ||
| 50 | |||
| 51 | void AsyncShaders::FreeWorkers() { | ||
| 52 | // Mark all threads to quit | ||
| 53 | is_thread_exiting.store(true); | ||
| 54 | cv.notify_all(); | ||
| 55 | for (auto& thread : worker_threads) { | ||
| 56 | thread.join(); | ||
| 57 | } | ||
| 58 | // Clear our shared contexts | ||
| 59 | context_list.clear(); | ||
| 60 | |||
| 61 | // Clear our worker threads | ||
| 62 | worker_threads.clear(); | ||
| 63 | } | ||
| 64 | |||
| 65 | void AsyncShaders::KillWorkers() { | ||
| 66 | is_thread_exiting.store(true); | ||
| 67 | cv.notify_all(); | ||
| 68 | for (auto& thread : worker_threads) { | ||
| 69 | thread.detach(); | ||
| 70 | } | ||
| 71 | // Clear our shared contexts | ||
| 72 | context_list.clear(); | ||
| 73 | |||
| 74 | // Clear our worker threads | ||
| 75 | worker_threads.clear(); | ||
| 76 | } | ||
| 77 | |||
| 78 | bool AsyncShaders::HasWorkQueued() const { | ||
| 79 | return !pending_queue.empty(); | ||
| 80 | } | ||
| 81 | |||
| 82 | bool AsyncShaders::HasCompletedWork() const { | ||
| 83 | std::shared_lock lock{completed_mutex}; | ||
| 84 | return !finished_work.empty(); | ||
| 85 | } | ||
| 86 | |||
| 87 | bool AsyncShaders::IsShaderAsync(const Tegra::GPU& gpu) const { | ||
| 88 | const auto& regs = gpu.Maxwell3D().regs; | ||
| 89 | |||
| 90 | // If something is using depth, we can assume that games are not rendering anything which will | ||
| 91 | // be used one time. | ||
| 92 | if (regs.zeta_enable) { | ||
| 93 | return true; | ||
| 94 | } | ||
| 95 | |||
| 96 | // If games are using a small index count, we can assume these are full screen quads. Usually | ||
| 97 | // these shaders are only used once for building textures so we can assume they can't be built | ||
| 98 | // async | ||
| 99 | if (regs.index_array.count <= 6 || regs.vertex_buffer.count <= 6) { | ||
| 100 | return false; | ||
| 101 | } | ||
| 102 | |||
| 103 | return true; | ||
| 104 | } | ||
| 105 | |||
| 106 | std::vector<AsyncShaders::Result> AsyncShaders::GetCompletedWork() { | ||
| 107 | std::vector<Result> results; | ||
| 108 | { | ||
| 109 | std::unique_lock lock{completed_mutex}; | ||
| 110 | results = std::move(finished_work); | ||
| 111 | finished_work.clear(); | ||
| 112 | } | ||
| 113 | return results; | ||
| 114 | } | ||
| 115 | |||
| 116 | void AsyncShaders::QueueOpenGLShader(const OpenGL::Device& device, | ||
| 117 | Tegra::Engines::ShaderType shader_type, u64 uid, | ||
| 118 | std::vector<u64> code, std::vector<u64> code_b, | ||
| 119 | u32 main_offset, CompilerSettings compiler_settings, | ||
| 120 | const Registry& registry, VAddr cpu_addr) { | ||
| 121 | std::unique_lock lock(queue_mutex); | ||
| 122 | pending_queue.push({ | ||
| 123 | .backend = device.UseAssemblyShaders() ? Backend::GLASM : Backend::OpenGL, | ||
| 124 | .device = &device, | ||
| 125 | .shader_type = shader_type, | ||
| 126 | .uid = uid, | ||
| 127 | .code = std::move(code), | ||
| 128 | .code_b = std::move(code_b), | ||
| 129 | .main_offset = main_offset, | ||
| 130 | .compiler_settings = compiler_settings, | ||
| 131 | .registry = registry, | ||
| 132 | .cpu_address = cpu_addr, | ||
| 133 | .pp_cache = nullptr, | ||
| 134 | .vk_device = nullptr, | ||
| 135 | .scheduler = nullptr, | ||
| 136 | .descriptor_pool = nullptr, | ||
| 137 | .update_descriptor_queue = nullptr, | ||
| 138 | .bindings{}, | ||
| 139 | .program{}, | ||
| 140 | .key{}, | ||
| 141 | .num_color_buffers = 0, | ||
| 142 | }); | ||
| 143 | cv.notify_one(); | ||
| 144 | } | ||
| 145 | |||
| 146 | void AsyncShaders::QueueVulkanShader(Vulkan::VKPipelineCache* pp_cache, | ||
| 147 | const Vulkan::Device& device, Vulkan::VKScheduler& scheduler, | ||
| 148 | Vulkan::VKDescriptorPool& descriptor_pool, | ||
| 149 | Vulkan::VKUpdateDescriptorQueue& update_descriptor_queue, | ||
| 150 | std::vector<VkDescriptorSetLayoutBinding> bindings, | ||
| 151 | Vulkan::SPIRVProgram program, | ||
| 152 | Vulkan::GraphicsPipelineCacheKey key, u32 num_color_buffers) { | ||
| 153 | std::unique_lock lock(queue_mutex); | ||
| 154 | pending_queue.push({ | ||
| 155 | .backend = Backend::Vulkan, | ||
| 156 | .device = nullptr, | ||
| 157 | .shader_type{}, | ||
| 158 | .uid = 0, | ||
| 159 | .code{}, | ||
| 160 | .code_b{}, | ||
| 161 | .main_offset = 0, | ||
| 162 | .compiler_settings{}, | ||
| 163 | .registry{}, | ||
| 164 | .cpu_address = 0, | ||
| 165 | .pp_cache = pp_cache, | ||
| 166 | .vk_device = &device, | ||
| 167 | .scheduler = &scheduler, | ||
| 168 | .descriptor_pool = &descriptor_pool, | ||
| 169 | .update_descriptor_queue = &update_descriptor_queue, | ||
| 170 | .bindings = std::move(bindings), | ||
| 171 | .program = std::move(program), | ||
| 172 | .key = key, | ||
| 173 | .num_color_buffers = num_color_buffers, | ||
| 174 | }); | ||
| 175 | cv.notify_one(); | ||
| 176 | } | ||
| 177 | |||
| 178 | void AsyncShaders::ShaderCompilerThread(Core::Frontend::GraphicsContext* context) { | ||
| 179 | while (!is_thread_exiting.load(std::memory_order_relaxed)) { | ||
| 180 | std::unique_lock lock{queue_mutex}; | ||
| 181 | cv.wait(lock, [this] { return HasWorkQueued() || is_thread_exiting; }); | ||
| 182 | if (is_thread_exiting) { | ||
| 183 | return; | ||
| 184 | } | ||
| 185 | |||
| 186 | // Partial lock to allow all threads to read at the same time | ||
| 187 | if (!HasWorkQueued()) { | ||
| 188 | continue; | ||
| 189 | } | ||
| 190 | // Another thread beat us, just unlock and wait for the next load | ||
| 191 | if (pending_queue.empty()) { | ||
| 192 | continue; | ||
| 193 | } | ||
| 194 | |||
| 195 | // Pull work from queue | ||
| 196 | WorkerParams work = std::move(pending_queue.front()); | ||
| 197 | pending_queue.pop(); | ||
| 198 | lock.unlock(); | ||
| 199 | |||
| 200 | if (work.backend == Backend::OpenGL || work.backend == Backend::GLASM) { | ||
| 201 | const ShaderIR ir(work.code, work.main_offset, work.compiler_settings, *work.registry); | ||
| 202 | const auto scope = context->Acquire(); | ||
| 203 | auto program = | ||
| 204 | OpenGL::BuildShader(*work.device, work.shader_type, work.uid, ir, *work.registry); | ||
| 205 | Result result{}; | ||
| 206 | result.backend = work.backend; | ||
| 207 | result.cpu_address = work.cpu_address; | ||
| 208 | result.uid = work.uid; | ||
| 209 | result.code = std::move(work.code); | ||
| 210 | result.code_b = std::move(work.code_b); | ||
| 211 | result.shader_type = work.shader_type; | ||
| 212 | |||
| 213 | if (work.backend == Backend::OpenGL) { | ||
| 214 | result.program.opengl = std::move(program->source_program); | ||
| 215 | } else if (work.backend == Backend::GLASM) { | ||
| 216 | result.program.glasm = std::move(program->assembly_program); | ||
| 217 | } | ||
| 218 | |||
| 219 | { | ||
| 220 | std::unique_lock complete_lock(completed_mutex); | ||
| 221 | finished_work.push_back(std::move(result)); | ||
| 222 | } | ||
| 223 | } else if (work.backend == Backend::Vulkan) { | ||
| 224 | auto pipeline = std::make_unique<Vulkan::VKGraphicsPipeline>( | ||
| 225 | *work.vk_device, *work.scheduler, *work.descriptor_pool, | ||
| 226 | *work.update_descriptor_queue, work.key, work.bindings, work.program, | ||
| 227 | work.num_color_buffers); | ||
| 228 | |||
| 229 | work.pp_cache->EmplacePipeline(std::move(pipeline)); | ||
| 230 | } | ||
| 231 | } | ||
| 232 | } | ||
| 233 | |||
| 234 | } // namespace VideoCommon::Shader | ||
diff --git a/src/video_core/shader/async_shaders.h b/src/video_core/shader/async_shaders.h deleted file mode 100644 index 7fdff6e56..000000000 --- a/src/video_core/shader/async_shaders.h +++ /dev/null | |||
| @@ -1,138 +0,0 @@ | |||
| 1 | // Copyright 2020 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <condition_variable> | ||
| 8 | #include <memory> | ||
| 9 | #include <shared_mutex> | ||
| 10 | #include <thread> | ||
| 11 | |||
| 12 | #include <glad/glad.h> | ||
| 13 | |||
| 14 | #include "common/common_types.h" | ||
| 15 | #include "video_core/renderer_opengl/gl_device.h" | ||
| 16 | #include "video_core/renderer_opengl/gl_resource_manager.h" | ||
| 17 | #include "video_core/renderer_opengl/gl_shader_decompiler.h" | ||
| 18 | #include "video_core/renderer_vulkan/vk_pipeline_cache.h" | ||
| 19 | #include "video_core/renderer_vulkan/vk_scheduler.h" | ||
| 20 | #include "video_core/vulkan_common/vulkan_device.h" | ||
| 21 | |||
| 22 | namespace Core::Frontend { | ||
| 23 | class EmuWindow; | ||
| 24 | class GraphicsContext; | ||
| 25 | } // namespace Core::Frontend | ||
| 26 | |||
| 27 | namespace Tegra { | ||
| 28 | class GPU; | ||
| 29 | } | ||
| 30 | |||
| 31 | namespace Vulkan { | ||
| 32 | class VKPipelineCache; | ||
| 33 | } | ||
| 34 | |||
| 35 | namespace VideoCommon::Shader { | ||
| 36 | |||
| 37 | class AsyncShaders { | ||
| 38 | public: | ||
| 39 | enum class Backend { | ||
| 40 | OpenGL, | ||
| 41 | GLASM, | ||
| 42 | Vulkan, | ||
| 43 | }; | ||
| 44 | |||
| 45 | struct ResultPrograms { | ||
| 46 | OpenGL::OGLProgram opengl; | ||
| 47 | OpenGL::OGLAssemblyProgram glasm; | ||
| 48 | }; | ||
| 49 | |||
| 50 | struct Result { | ||
| 51 | u64 uid; | ||
| 52 | VAddr cpu_address; | ||
| 53 | Backend backend; | ||
| 54 | ResultPrograms program; | ||
| 55 | std::vector<u64> code; | ||
| 56 | std::vector<u64> code_b; | ||
| 57 | Tegra::Engines::ShaderType shader_type; | ||
| 58 | }; | ||
| 59 | |||
| 60 | explicit AsyncShaders(Core::Frontend::EmuWindow& emu_window_); | ||
| 61 | ~AsyncShaders(); | ||
| 62 | |||
| 63 | /// Start up shader worker threads | ||
| 64 | void AllocateWorkers(); | ||
| 65 | |||
| 66 | /// Clear the shader queue and kill all worker threads | ||
| 67 | void FreeWorkers(); | ||
| 68 | |||
| 69 | // Force end all threads | ||
| 70 | void KillWorkers(); | ||
| 71 | |||
| 72 | /// Check to see if any shaders have actually been compiled | ||
| 73 | [[nodiscard]] bool HasCompletedWork() const; | ||
| 74 | |||
| 75 | /// Deduce if a shader can be build on another thread of MUST be built in sync. We cannot build | ||
| 76 | /// every shader async as some shaders are only built and executed once. We try to "guess" which | ||
| 77 | /// shader would be used only once | ||
| 78 | [[nodiscard]] bool IsShaderAsync(const Tegra::GPU& gpu) const; | ||
| 79 | |||
| 80 | /// Pulls completed compiled shaders | ||
| 81 | [[nodiscard]] std::vector<Result> GetCompletedWork(); | ||
| 82 | |||
| 83 | void QueueOpenGLShader(const OpenGL::Device& device, Tegra::Engines::ShaderType shader_type, | ||
| 84 | u64 uid, std::vector<u64> code, std::vector<u64> code_b, u32 main_offset, | ||
| 85 | CompilerSettings compiler_settings, const Registry& registry, | ||
| 86 | VAddr cpu_addr); | ||
| 87 | |||
| 88 | void QueueVulkanShader(Vulkan::VKPipelineCache* pp_cache, const Vulkan::Device& device, | ||
| 89 | Vulkan::VKScheduler& scheduler, | ||
| 90 | Vulkan::VKDescriptorPool& descriptor_pool, | ||
| 91 | Vulkan::VKUpdateDescriptorQueue& update_descriptor_queue, | ||
| 92 | std::vector<VkDescriptorSetLayoutBinding> bindings, | ||
| 93 | Vulkan::SPIRVProgram program, Vulkan::GraphicsPipelineCacheKey key, | ||
| 94 | u32 num_color_buffers); | ||
| 95 | |||
| 96 | private: | ||
| 97 | void ShaderCompilerThread(Core::Frontend::GraphicsContext* context); | ||
| 98 | |||
| 99 | /// Check our worker queue to see if we have any work queued already | ||
| 100 | [[nodiscard]] bool HasWorkQueued() const; | ||
| 101 | |||
| 102 | struct WorkerParams { | ||
| 103 | Backend backend; | ||
| 104 | // For OGL | ||
| 105 | const OpenGL::Device* device; | ||
| 106 | Tegra::Engines::ShaderType shader_type; | ||
| 107 | u64 uid; | ||
| 108 | std::vector<u64> code; | ||
| 109 | std::vector<u64> code_b; | ||
| 110 | u32 main_offset; | ||
| 111 | CompilerSettings compiler_settings; | ||
| 112 | std::optional<Registry> registry; | ||
| 113 | VAddr cpu_address; | ||
| 114 | |||
| 115 | // For Vulkan | ||
| 116 | Vulkan::VKPipelineCache* pp_cache; | ||
| 117 | const Vulkan::Device* vk_device; | ||
| 118 | Vulkan::VKScheduler* scheduler; | ||
| 119 | Vulkan::VKDescriptorPool* descriptor_pool; | ||
| 120 | Vulkan::VKUpdateDescriptorQueue* update_descriptor_queue; | ||
| 121 | std::vector<VkDescriptorSetLayoutBinding> bindings; | ||
| 122 | Vulkan::SPIRVProgram program; | ||
| 123 | Vulkan::GraphicsPipelineCacheKey key; | ||
| 124 | u32 num_color_buffers; | ||
| 125 | }; | ||
| 126 | |||
| 127 | std::condition_variable cv; | ||
| 128 | mutable std::mutex queue_mutex; | ||
| 129 | mutable std::shared_mutex completed_mutex; | ||
| 130 | std::atomic<bool> is_thread_exiting{}; | ||
| 131 | std::vector<std::unique_ptr<Core::Frontend::GraphicsContext>> context_list; | ||
| 132 | std::vector<std::thread> worker_threads; | ||
| 133 | std::queue<WorkerParams> pending_queue; | ||
| 134 | std::vector<Result> finished_work; | ||
| 135 | Core::Frontend::EmuWindow& emu_window; | ||
| 136 | }; | ||
| 137 | |||
| 138 | } // namespace VideoCommon::Shader | ||
diff --git a/src/video_core/shader/compiler_settings.cpp b/src/video_core/shader/compiler_settings.cpp deleted file mode 100644 index cddcbd4f0..000000000 --- a/src/video_core/shader/compiler_settings.cpp +++ /dev/null | |||
| @@ -1,26 +0,0 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "video_core/shader/compiler_settings.h" | ||
| 6 | |||
| 7 | namespace VideoCommon::Shader { | ||
| 8 | |||
| 9 | std::string CompileDepthAsString(const CompileDepth cd) { | ||
| 10 | switch (cd) { | ||
| 11 | case CompileDepth::BruteForce: | ||
| 12 | return "Brute Force Compile"; | ||
| 13 | case CompileDepth::FlowStack: | ||
| 14 | return "Simple Flow Stack Mode"; | ||
| 15 | case CompileDepth::NoFlowStack: | ||
| 16 | return "Remove Flow Stack"; | ||
| 17 | case CompileDepth::DecompileBackwards: | ||
| 18 | return "Decompile Backward Jumps"; | ||
| 19 | case CompileDepth::FullDecompile: | ||
| 20 | return "Full Decompilation"; | ||
| 21 | default: | ||
| 22 | return "Unknown Compiler Process"; | ||
| 23 | } | ||
| 24 | } | ||
| 25 | |||
| 26 | } // namespace VideoCommon::Shader | ||
diff --git a/src/video_core/shader/compiler_settings.h b/src/video_core/shader/compiler_settings.h deleted file mode 100644 index 916018c01..000000000 --- a/src/video_core/shader/compiler_settings.h +++ /dev/null | |||
| @@ -1,26 +0,0 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include "video_core/engines/shader_bytecode.h" | ||
| 8 | |||
| 9 | namespace VideoCommon::Shader { | ||
| 10 | |||
| 11 | enum class CompileDepth : u32 { | ||
| 12 | BruteForce = 0, | ||
| 13 | FlowStack = 1, | ||
| 14 | NoFlowStack = 2, | ||
| 15 | DecompileBackwards = 3, | ||
| 16 | FullDecompile = 4, | ||
| 17 | }; | ||
| 18 | |||
| 19 | std::string CompileDepthAsString(CompileDepth cd); | ||
| 20 | |||
| 21 | struct CompilerSettings { | ||
| 22 | CompileDepth depth{CompileDepth::NoFlowStack}; | ||
| 23 | bool disable_else_derivation{true}; | ||
| 24 | }; | ||
| 25 | |||
| 26 | } // namespace VideoCommon::Shader | ||
diff --git a/src/video_core/shader/control_flow.cpp b/src/video_core/shader/control_flow.cpp deleted file mode 100644 index 43d965f2f..000000000 --- a/src/video_core/shader/control_flow.cpp +++ /dev/null | |||
| @@ -1,751 +0,0 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <list> | ||
| 6 | #include <map> | ||
| 7 | #include <set> | ||
| 8 | #include <stack> | ||
| 9 | #include <unordered_map> | ||
| 10 | #include <vector> | ||
| 11 | |||
| 12 | #include "common/assert.h" | ||
| 13 | #include "common/common_types.h" | ||
| 14 | #include "video_core/shader/ast.h" | ||
| 15 | #include "video_core/shader/control_flow.h" | ||
| 16 | #include "video_core/shader/memory_util.h" | ||
| 17 | #include "video_core/shader/registry.h" | ||
| 18 | #include "video_core/shader/shader_ir.h" | ||
| 19 | |||
| 20 | namespace VideoCommon::Shader { | ||
| 21 | |||
| 22 | namespace { | ||
| 23 | |||
| 24 | using Tegra::Shader::Instruction; | ||
| 25 | using Tegra::Shader::OpCode; | ||
| 26 | |||
| 27 | constexpr s32 unassigned_branch = -2; | ||
| 28 | |||
| 29 | struct Query { | ||
| 30 | u32 address{}; | ||
| 31 | std::stack<u32> ssy_stack{}; | ||
| 32 | std::stack<u32> pbk_stack{}; | ||
| 33 | }; | ||
| 34 | |||
| 35 | struct BlockStack { | ||
| 36 | BlockStack() = default; | ||
| 37 | explicit BlockStack(const Query& q) : ssy_stack{q.ssy_stack}, pbk_stack{q.pbk_stack} {} | ||
| 38 | std::stack<u32> ssy_stack{}; | ||
| 39 | std::stack<u32> pbk_stack{}; | ||
| 40 | }; | ||
| 41 | |||
| 42 | template <typename T, typename... Args> | ||
| 43 | BlockBranchInfo MakeBranchInfo(Args&&... args) { | ||
| 44 | static_assert(std::is_convertible_v<T, BranchData>); | ||
| 45 | return std::make_shared<BranchData>(T(std::forward<Args>(args)...)); | ||
| 46 | } | ||
| 47 | |||
| 48 | bool BlockBranchIsIgnored(BlockBranchInfo first) { | ||
| 49 | bool ignore = false; | ||
| 50 | if (std::holds_alternative<SingleBranch>(*first)) { | ||
| 51 | const auto branch = std::get_if<SingleBranch>(first.get()); | ||
| 52 | ignore = branch->ignore; | ||
| 53 | } | ||
| 54 | return ignore; | ||
| 55 | } | ||
| 56 | |||
| 57 | struct BlockInfo { | ||
| 58 | u32 start{}; | ||
| 59 | u32 end{}; | ||
| 60 | bool visited{}; | ||
| 61 | BlockBranchInfo branch{}; | ||
| 62 | |||
| 63 | bool IsInside(const u32 address) const { | ||
| 64 | return start <= address && address <= end; | ||
| 65 | } | ||
| 66 | }; | ||
| 67 | |||
| 68 | struct CFGRebuildState { | ||
| 69 | explicit CFGRebuildState(const ProgramCode& program_code_, u32 start_, Registry& registry_) | ||
| 70 | : program_code{program_code_}, registry{registry_}, start{start_} {} | ||
| 71 | |||
| 72 | const ProgramCode& program_code; | ||
| 73 | Registry& registry; | ||
| 74 | u32 start{}; | ||
| 75 | std::vector<BlockInfo> block_info; | ||
| 76 | std::list<u32> inspect_queries; | ||
| 77 | std::list<Query> queries; | ||
| 78 | std::unordered_map<u32, u32> registered; | ||
| 79 | std::set<u32> labels; | ||
| 80 | std::map<u32, u32> ssy_labels; | ||
| 81 | std::map<u32, u32> pbk_labels; | ||
| 82 | std::unordered_map<u32, BlockStack> stacks; | ||
| 83 | ASTManager* manager{}; | ||
| 84 | }; | ||
| 85 | |||
| 86 | enum class BlockCollision : u32 { None, Found, Inside }; | ||
| 87 | |||
| 88 | std::pair<BlockCollision, u32> TryGetBlock(CFGRebuildState& state, u32 address) { | ||
| 89 | const auto& blocks = state.block_info; | ||
| 90 | for (u32 index = 0; index < blocks.size(); index++) { | ||
| 91 | if (blocks[index].start == address) { | ||
| 92 | return {BlockCollision::Found, index}; | ||
| 93 | } | ||
| 94 | if (blocks[index].IsInside(address)) { | ||
| 95 | return {BlockCollision::Inside, index}; | ||
| 96 | } | ||
| 97 | } | ||
| 98 | return {BlockCollision::None, 0xFFFFFFFF}; | ||
| 99 | } | ||
| 100 | |||
| 101 | struct ParseInfo { | ||
| 102 | BlockBranchInfo branch_info{}; | ||
| 103 | u32 end_address{}; | ||
| 104 | }; | ||
| 105 | |||
| 106 | BlockInfo& CreateBlockInfo(CFGRebuildState& state, u32 start, u32 end) { | ||
| 107 | auto& it = state.block_info.emplace_back(); | ||
| 108 | it.start = start; | ||
| 109 | it.end = end; | ||
| 110 | const u32 index = static_cast<u32>(state.block_info.size() - 1); | ||
| 111 | state.registered.insert({start, index}); | ||
| 112 | return it; | ||
| 113 | } | ||
| 114 | |||
| 115 | Pred GetPredicate(u32 index, bool negated) { | ||
| 116 | return static_cast<Pred>(static_cast<u64>(index) + (negated ? 8ULL : 0ULL)); | ||
| 117 | } | ||
| 118 | |||
| 119 | enum class ParseResult : u32 { | ||
| 120 | ControlCaught, | ||
| 121 | BlockEnd, | ||
| 122 | AbnormalFlow, | ||
| 123 | }; | ||
| 124 | |||
| 125 | struct BranchIndirectInfo { | ||
| 126 | u32 buffer{}; | ||
| 127 | u32 offset{}; | ||
| 128 | u32 entries{}; | ||
| 129 | s32 relative_position{}; | ||
| 130 | }; | ||
| 131 | |||
| 132 | struct BufferInfo { | ||
| 133 | u32 index; | ||
| 134 | u32 offset; | ||
| 135 | }; | ||
| 136 | |||
| 137 | std::optional<std::pair<s32, u64>> GetBRXInfo(const CFGRebuildState& state, u32& pos) { | ||
| 138 | const Instruction instr = state.program_code[pos]; | ||
| 139 | const auto opcode = OpCode::Decode(instr); | ||
| 140 | if (opcode->get().GetId() != OpCode::Id::BRX) { | ||
| 141 | return std::nullopt; | ||
| 142 | } | ||
| 143 | if (instr.brx.constant_buffer != 0) { | ||
| 144 | return std::nullopt; | ||
| 145 | } | ||
| 146 | --pos; | ||
| 147 | return std::make_pair(instr.brx.GetBranchExtend(), instr.gpr8.Value()); | ||
| 148 | } | ||
| 149 | |||
| 150 | template <typename Result, typename TestCallable, typename PackCallable> | ||
| 151 | // requires std::predicate<TestCallable, Instruction, const OpCode::Matcher&> | ||
| 152 | // requires std::invocable<PackCallable, Instruction, const OpCode::Matcher&> | ||
| 153 | std::optional<Result> TrackInstruction(const CFGRebuildState& state, u32& pos, TestCallable test, | ||
| 154 | PackCallable pack) { | ||
| 155 | for (; pos >= state.start; --pos) { | ||
| 156 | if (IsSchedInstruction(pos, state.start)) { | ||
| 157 | continue; | ||
| 158 | } | ||
| 159 | const Instruction instr = state.program_code[pos]; | ||
| 160 | const auto opcode = OpCode::Decode(instr); | ||
| 161 | if (!opcode) { | ||
| 162 | continue; | ||
| 163 | } | ||
| 164 | if (test(instr, opcode->get())) { | ||
| 165 | --pos; | ||
| 166 | return std::make_optional(pack(instr, opcode->get())); | ||
| 167 | } | ||
| 168 | } | ||
| 169 | return std::nullopt; | ||
| 170 | } | ||
| 171 | |||
| 172 | std::optional<std::pair<BufferInfo, u64>> TrackLDC(const CFGRebuildState& state, u32& pos, | ||
| 173 | u64 brx_tracked_register) { | ||
| 174 | return TrackInstruction<std::pair<BufferInfo, u64>>( | ||
| 175 | state, pos, | ||
| 176 | [brx_tracked_register](auto instr, const auto& opcode) { | ||
| 177 | return opcode.GetId() == OpCode::Id::LD_C && | ||
| 178 | instr.gpr0.Value() == brx_tracked_register && | ||
| 179 | instr.ld_c.type.Value() == Tegra::Shader::UniformType::Single; | ||
| 180 | }, | ||
| 181 | [](auto instr, const auto& opcode) { | ||
| 182 | const BufferInfo info = {static_cast<u32>(instr.cbuf36.index.Value()), | ||
| 183 | static_cast<u32>(instr.cbuf36.GetOffset())}; | ||
| 184 | return std::make_pair(info, instr.gpr8.Value()); | ||
| 185 | }); | ||
| 186 | } | ||
| 187 | |||
| 188 | std::optional<u64> TrackSHLRegister(const CFGRebuildState& state, u32& pos, | ||
| 189 | u64 ldc_tracked_register) { | ||
| 190 | return TrackInstruction<u64>( | ||
| 191 | state, pos, | ||
| 192 | [ldc_tracked_register](auto instr, const auto& opcode) { | ||
| 193 | return opcode.GetId() == OpCode::Id::SHL_IMM && | ||
| 194 | instr.gpr0.Value() == ldc_tracked_register; | ||
| 195 | }, | ||
| 196 | [](auto instr, const auto&) { return instr.gpr8.Value(); }); | ||
| 197 | } | ||
| 198 | |||
| 199 | std::optional<u32> TrackIMNMXValue(const CFGRebuildState& state, u32& pos, | ||
| 200 | u64 shl_tracked_register) { | ||
| 201 | return TrackInstruction<u32>( | ||
| 202 | state, pos, | ||
| 203 | [shl_tracked_register](auto instr, const auto& opcode) { | ||
| 204 | return opcode.GetId() == OpCode::Id::IMNMX_IMM && | ||
| 205 | instr.gpr0.Value() == shl_tracked_register; | ||
| 206 | }, | ||
| 207 | [](auto instr, const auto&) { | ||
| 208 | return static_cast<u32>(instr.alu.GetSignedImm20_20() + 1); | ||
| 209 | }); | ||
| 210 | } | ||
| 211 | |||
| 212 | std::optional<BranchIndirectInfo> TrackBranchIndirectInfo(const CFGRebuildState& state, u32 pos) { | ||
| 213 | const auto brx_info = GetBRXInfo(state, pos); | ||
| 214 | if (!brx_info) { | ||
| 215 | return std::nullopt; | ||
| 216 | } | ||
| 217 | const auto [relative_position, brx_tracked_register] = *brx_info; | ||
| 218 | |||
| 219 | const auto ldc_info = TrackLDC(state, pos, brx_tracked_register); | ||
| 220 | if (!ldc_info) { | ||
| 221 | return std::nullopt; | ||
| 222 | } | ||
| 223 | const auto [buffer_info, ldc_tracked_register] = *ldc_info; | ||
| 224 | |||
| 225 | const auto shl_tracked_register = TrackSHLRegister(state, pos, ldc_tracked_register); | ||
| 226 | if (!shl_tracked_register) { | ||
| 227 | return std::nullopt; | ||
| 228 | } | ||
| 229 | |||
| 230 | const auto entries = TrackIMNMXValue(state, pos, *shl_tracked_register); | ||
| 231 | if (!entries) { | ||
| 232 | return std::nullopt; | ||
| 233 | } | ||
| 234 | |||
| 235 | return BranchIndirectInfo{buffer_info.index, buffer_info.offset, *entries, relative_position}; | ||
| 236 | } | ||
| 237 | |||
| 238 | std::pair<ParseResult, ParseInfo> ParseCode(CFGRebuildState& state, u32 address) { | ||
| 239 | u32 offset = static_cast<u32>(address); | ||
| 240 | const u32 end_address = static_cast<u32>(state.program_code.size()); | ||
| 241 | ParseInfo parse_info{}; | ||
| 242 | SingleBranch single_branch{}; | ||
| 243 | |||
| 244 | const auto insert_label = [](CFGRebuildState& rebuild_state, u32 label_address) { | ||
| 245 | const auto pair = rebuild_state.labels.emplace(label_address); | ||
| 246 | if (pair.second) { | ||
| 247 | rebuild_state.inspect_queries.push_back(label_address); | ||
| 248 | } | ||
| 249 | }; | ||
| 250 | |||
| 251 | while (true) { | ||
| 252 | if (offset >= end_address) { | ||
| 253 | // ASSERT_OR_EXECUTE can't be used, as it ignores the break | ||
| 254 | ASSERT_MSG(false, "Shader passed the current limit!"); | ||
| 255 | |||
| 256 | single_branch.address = exit_branch; | ||
| 257 | single_branch.ignore = false; | ||
| 258 | break; | ||
| 259 | } | ||
| 260 | if (state.registered.contains(offset)) { | ||
| 261 | single_branch.address = offset; | ||
| 262 | single_branch.ignore = true; | ||
| 263 | break; | ||
| 264 | } | ||
| 265 | if (IsSchedInstruction(offset, state.start)) { | ||
| 266 | offset++; | ||
| 267 | continue; | ||
| 268 | } | ||
| 269 | const Instruction instr = {state.program_code[offset]}; | ||
| 270 | const auto opcode = OpCode::Decode(instr); | ||
| 271 | if (!opcode || opcode->get().GetType() != OpCode::Type::Flow) { | ||
| 272 | offset++; | ||
| 273 | continue; | ||
| 274 | } | ||
| 275 | |||
| 276 | switch (opcode->get().GetId()) { | ||
| 277 | case OpCode::Id::EXIT: { | ||
| 278 | const auto pred_index = static_cast<u32>(instr.pred.pred_index); | ||
| 279 | single_branch.condition.predicate = GetPredicate(pred_index, instr.negate_pred != 0); | ||
| 280 | if (single_branch.condition.predicate == Pred::NeverExecute) { | ||
| 281 | offset++; | ||
| 282 | continue; | ||
| 283 | } | ||
| 284 | const ConditionCode cc = instr.flow_condition_code; | ||
| 285 | single_branch.condition.cc = cc; | ||
| 286 | if (cc == ConditionCode::F) { | ||
| 287 | offset++; | ||
| 288 | continue; | ||
| 289 | } | ||
| 290 | single_branch.address = exit_branch; | ||
| 291 | single_branch.kill = false; | ||
| 292 | single_branch.is_sync = false; | ||
| 293 | single_branch.is_brk = false; | ||
| 294 | single_branch.ignore = false; | ||
| 295 | parse_info.end_address = offset; | ||
| 296 | parse_info.branch_info = MakeBranchInfo<SingleBranch>( | ||
| 297 | single_branch.condition, single_branch.address, single_branch.kill, | ||
| 298 | single_branch.is_sync, single_branch.is_brk, single_branch.ignore); | ||
| 299 | |||
| 300 | return {ParseResult::ControlCaught, parse_info}; | ||
| 301 | } | ||
| 302 | case OpCode::Id::BRA: { | ||
| 303 | if (instr.bra.constant_buffer != 0) { | ||
| 304 | return {ParseResult::AbnormalFlow, parse_info}; | ||
| 305 | } | ||
| 306 | const auto pred_index = static_cast<u32>(instr.pred.pred_index); | ||
| 307 | single_branch.condition.predicate = GetPredicate(pred_index, instr.negate_pred != 0); | ||
| 308 | if (single_branch.condition.predicate == Pred::NeverExecute) { | ||
| 309 | offset++; | ||
| 310 | continue; | ||
| 311 | } | ||
| 312 | const ConditionCode cc = instr.flow_condition_code; | ||
| 313 | single_branch.condition.cc = cc; | ||
| 314 | if (cc == ConditionCode::F) { | ||
| 315 | offset++; | ||
| 316 | continue; | ||
| 317 | } | ||
| 318 | const u32 branch_offset = offset + instr.bra.GetBranchTarget(); | ||
| 319 | if (branch_offset == 0) { | ||
| 320 | single_branch.address = exit_branch; | ||
| 321 | } else { | ||
| 322 | single_branch.address = branch_offset; | ||
| 323 | } | ||
| 324 | insert_label(state, branch_offset); | ||
| 325 | single_branch.kill = false; | ||
| 326 | single_branch.is_sync = false; | ||
| 327 | single_branch.is_brk = false; | ||
| 328 | single_branch.ignore = false; | ||
| 329 | parse_info.end_address = offset; | ||
| 330 | parse_info.branch_info = MakeBranchInfo<SingleBranch>( | ||
| 331 | single_branch.condition, single_branch.address, single_branch.kill, | ||
| 332 | single_branch.is_sync, single_branch.is_brk, single_branch.ignore); | ||
| 333 | |||
| 334 | return {ParseResult::ControlCaught, parse_info}; | ||
| 335 | } | ||
| 336 | case OpCode::Id::SYNC: { | ||
| 337 | const auto pred_index = static_cast<u32>(instr.pred.pred_index); | ||
| 338 | single_branch.condition.predicate = GetPredicate(pred_index, instr.negate_pred != 0); | ||
| 339 | if (single_branch.condition.predicate == Pred::NeverExecute) { | ||
| 340 | offset++; | ||
| 341 | continue; | ||
| 342 | } | ||
| 343 | const ConditionCode cc = instr.flow_condition_code; | ||
| 344 | single_branch.condition.cc = cc; | ||
| 345 | if (cc == ConditionCode::F) { | ||
| 346 | offset++; | ||
| 347 | continue; | ||
| 348 | } | ||
| 349 | single_branch.address = unassigned_branch; | ||
| 350 | single_branch.kill = false; | ||
| 351 | single_branch.is_sync = true; | ||
| 352 | single_branch.is_brk = false; | ||
| 353 | single_branch.ignore = false; | ||
| 354 | parse_info.end_address = offset; | ||
| 355 | parse_info.branch_info = MakeBranchInfo<SingleBranch>( | ||
| 356 | single_branch.condition, single_branch.address, single_branch.kill, | ||
| 357 | single_branch.is_sync, single_branch.is_brk, single_branch.ignore); | ||
| 358 | |||
| 359 | return {ParseResult::ControlCaught, parse_info}; | ||
| 360 | } | ||
| 361 | case OpCode::Id::BRK: { | ||
| 362 | const auto pred_index = static_cast<u32>(instr.pred.pred_index); | ||
| 363 | single_branch.condition.predicate = GetPredicate(pred_index, instr.negate_pred != 0); | ||
| 364 | if (single_branch.condition.predicate == Pred::NeverExecute) { | ||
| 365 | offset++; | ||
| 366 | continue; | ||
| 367 | } | ||
| 368 | const ConditionCode cc = instr.flow_condition_code; | ||
| 369 | single_branch.condition.cc = cc; | ||
| 370 | if (cc == ConditionCode::F) { | ||
| 371 | offset++; | ||
| 372 | continue; | ||
| 373 | } | ||
| 374 | single_branch.address = unassigned_branch; | ||
| 375 | single_branch.kill = false; | ||
| 376 | single_branch.is_sync = false; | ||
| 377 | single_branch.is_brk = true; | ||
| 378 | single_branch.ignore = false; | ||
| 379 | parse_info.end_address = offset; | ||
| 380 | parse_info.branch_info = MakeBranchInfo<SingleBranch>( | ||
| 381 | single_branch.condition, single_branch.address, single_branch.kill, | ||
| 382 | single_branch.is_sync, single_branch.is_brk, single_branch.ignore); | ||
| 383 | |||
| 384 | return {ParseResult::ControlCaught, parse_info}; | ||
| 385 | } | ||
| 386 | case OpCode::Id::KIL: { | ||
| 387 | const auto pred_index = static_cast<u32>(instr.pred.pred_index); | ||
| 388 | single_branch.condition.predicate = GetPredicate(pred_index, instr.negate_pred != 0); | ||
| 389 | if (single_branch.condition.predicate == Pred::NeverExecute) { | ||
| 390 | offset++; | ||
| 391 | continue; | ||
| 392 | } | ||
| 393 | const ConditionCode cc = instr.flow_condition_code; | ||
| 394 | single_branch.condition.cc = cc; | ||
| 395 | if (cc == ConditionCode::F) { | ||
| 396 | offset++; | ||
| 397 | continue; | ||
| 398 | } | ||
| 399 | single_branch.address = exit_branch; | ||
| 400 | single_branch.kill = true; | ||
| 401 | single_branch.is_sync = false; | ||
| 402 | single_branch.is_brk = false; | ||
| 403 | single_branch.ignore = false; | ||
| 404 | parse_info.end_address = offset; | ||
| 405 | parse_info.branch_info = MakeBranchInfo<SingleBranch>( | ||
| 406 | single_branch.condition, single_branch.address, single_branch.kill, | ||
| 407 | single_branch.is_sync, single_branch.is_brk, single_branch.ignore); | ||
| 408 | |||
| 409 | return {ParseResult::ControlCaught, parse_info}; | ||
| 410 | } | ||
| 411 | case OpCode::Id::SSY: { | ||
| 412 | const u32 target = offset + instr.bra.GetBranchTarget(); | ||
| 413 | insert_label(state, target); | ||
| 414 | state.ssy_labels.emplace(offset, target); | ||
| 415 | break; | ||
| 416 | } | ||
| 417 | case OpCode::Id::PBK: { | ||
| 418 | const u32 target = offset + instr.bra.GetBranchTarget(); | ||
| 419 | insert_label(state, target); | ||
| 420 | state.pbk_labels.emplace(offset, target); | ||
| 421 | break; | ||
| 422 | } | ||
| 423 | case OpCode::Id::BRX: { | ||
| 424 | const auto tmp = TrackBranchIndirectInfo(state, offset); | ||
| 425 | if (!tmp) { | ||
| 426 | LOG_WARNING(HW_GPU, "BRX Track Unsuccesful"); | ||
| 427 | return {ParseResult::AbnormalFlow, parse_info}; | ||
| 428 | } | ||
| 429 | |||
| 430 | const auto result = *tmp; | ||
| 431 | const s32 pc_target = offset + result.relative_position; | ||
| 432 | std::vector<CaseBranch> branches; | ||
| 433 | for (u32 i = 0; i < result.entries; i++) { | ||
| 434 | auto key = state.registry.ObtainKey(result.buffer, result.offset + i * 4); | ||
| 435 | if (!key) { | ||
| 436 | return {ParseResult::AbnormalFlow, parse_info}; | ||
| 437 | } | ||
| 438 | u32 value = *key; | ||
| 439 | u32 target = static_cast<u32>((value >> 3) + pc_target); | ||
| 440 | insert_label(state, target); | ||
| 441 | branches.emplace_back(value, target); | ||
| 442 | } | ||
| 443 | parse_info.end_address = offset; | ||
| 444 | parse_info.branch_info = MakeBranchInfo<MultiBranch>( | ||
| 445 | static_cast<u32>(instr.gpr8.Value()), std::move(branches)); | ||
| 446 | |||
| 447 | return {ParseResult::ControlCaught, parse_info}; | ||
| 448 | } | ||
| 449 | default: | ||
| 450 | break; | ||
| 451 | } | ||
| 452 | |||
| 453 | offset++; | ||
| 454 | } | ||
| 455 | single_branch.kill = false; | ||
| 456 | single_branch.is_sync = false; | ||
| 457 | single_branch.is_brk = false; | ||
| 458 | parse_info.end_address = offset - 1; | ||
| 459 | parse_info.branch_info = MakeBranchInfo<SingleBranch>( | ||
| 460 | single_branch.condition, single_branch.address, single_branch.kill, single_branch.is_sync, | ||
| 461 | single_branch.is_brk, single_branch.ignore); | ||
| 462 | return {ParseResult::BlockEnd, parse_info}; | ||
| 463 | } | ||
| 464 | |||
| 465 | bool TryInspectAddress(CFGRebuildState& state) { | ||
| 466 | if (state.inspect_queries.empty()) { | ||
| 467 | return false; | ||
| 468 | } | ||
| 469 | |||
| 470 | const u32 address = state.inspect_queries.front(); | ||
| 471 | state.inspect_queries.pop_front(); | ||
| 472 | const auto [result, block_index] = TryGetBlock(state, address); | ||
| 473 | switch (result) { | ||
| 474 | case BlockCollision::Found: { | ||
| 475 | return true; | ||
| 476 | } | ||
| 477 | case BlockCollision::Inside: { | ||
| 478 | // This case is the tricky one: | ||
| 479 | // We need to split the block into 2 separate blocks | ||
| 480 | const u32 end = state.block_info[block_index].end; | ||
| 481 | BlockInfo& new_block = CreateBlockInfo(state, address, end); | ||
| 482 | BlockInfo& current_block = state.block_info[block_index]; | ||
| 483 | current_block.end = address - 1; | ||
| 484 | new_block.branch = std::move(current_block.branch); | ||
| 485 | BlockBranchInfo forward_branch = MakeBranchInfo<SingleBranch>(); | ||
| 486 | const auto branch = std::get_if<SingleBranch>(forward_branch.get()); | ||
| 487 | branch->address = address; | ||
| 488 | branch->ignore = true; | ||
| 489 | current_block.branch = std::move(forward_branch); | ||
| 490 | return true; | ||
| 491 | } | ||
| 492 | default: | ||
| 493 | break; | ||
| 494 | } | ||
| 495 | const auto [parse_result, parse_info] = ParseCode(state, address); | ||
| 496 | if (parse_result == ParseResult::AbnormalFlow) { | ||
| 497 | // if it's AbnormalFlow, we end it as false, ending the CFG reconstruction | ||
| 498 | return false; | ||
| 499 | } | ||
| 500 | |||
| 501 | BlockInfo& block_info = CreateBlockInfo(state, address, parse_info.end_address); | ||
| 502 | block_info.branch = parse_info.branch_info; | ||
| 503 | if (std::holds_alternative<SingleBranch>(*block_info.branch)) { | ||
| 504 | const auto branch = std::get_if<SingleBranch>(block_info.branch.get()); | ||
| 505 | if (branch->condition.IsUnconditional()) { | ||
| 506 | return true; | ||
| 507 | } | ||
| 508 | const u32 fallthrough_address = parse_info.end_address + 1; | ||
| 509 | state.inspect_queries.push_front(fallthrough_address); | ||
| 510 | return true; | ||
| 511 | } | ||
| 512 | return true; | ||
| 513 | } | ||
| 514 | |||
| 515 | bool TryQuery(CFGRebuildState& state) { | ||
| 516 | const auto gather_labels = [](std::stack<u32>& cc, std::map<u32, u32>& labels, | ||
| 517 | BlockInfo& block) { | ||
| 518 | auto gather_start = labels.lower_bound(block.start); | ||
| 519 | const auto gather_end = labels.upper_bound(block.end); | ||
| 520 | while (gather_start != gather_end) { | ||
| 521 | cc.push(gather_start->second); | ||
| 522 | ++gather_start; | ||
| 523 | } | ||
| 524 | }; | ||
| 525 | if (state.queries.empty()) { | ||
| 526 | return false; | ||
| 527 | } | ||
| 528 | |||
| 529 | Query& q = state.queries.front(); | ||
| 530 | const u32 block_index = state.registered[q.address]; | ||
| 531 | BlockInfo& block = state.block_info[block_index]; | ||
| 532 | // If the block is visited, check if the stacks match, else gather the ssy/pbk | ||
| 533 | // labels into the current stack and look if the branch at the end of the block | ||
| 534 | // consumes a label. Schedule new queries accordingly | ||
| 535 | if (block.visited) { | ||
| 536 | BlockStack& stack = state.stacks[q.address]; | ||
| 537 | const bool all_okay = (stack.ssy_stack.empty() || q.ssy_stack == stack.ssy_stack) && | ||
| 538 | (stack.pbk_stack.empty() || q.pbk_stack == stack.pbk_stack); | ||
| 539 | state.queries.pop_front(); | ||
| 540 | return all_okay; | ||
| 541 | } | ||
| 542 | block.visited = true; | ||
| 543 | state.stacks.insert_or_assign(q.address, BlockStack{q}); | ||
| 544 | |||
| 545 | Query q2(q); | ||
| 546 | state.queries.pop_front(); | ||
| 547 | gather_labels(q2.ssy_stack, state.ssy_labels, block); | ||
| 548 | gather_labels(q2.pbk_stack, state.pbk_labels, block); | ||
| 549 | if (std::holds_alternative<SingleBranch>(*block.branch)) { | ||
| 550 | auto* branch = std::get_if<SingleBranch>(block.branch.get()); | ||
| 551 | if (!branch->condition.IsUnconditional()) { | ||
| 552 | q2.address = block.end + 1; | ||
| 553 | state.queries.push_back(q2); | ||
| 554 | } | ||
| 555 | |||
| 556 | auto& conditional_query = state.queries.emplace_back(q2); | ||
| 557 | if (branch->is_sync) { | ||
| 558 | if (branch->address == unassigned_branch) { | ||
| 559 | branch->address = conditional_query.ssy_stack.top(); | ||
| 560 | } | ||
| 561 | conditional_query.ssy_stack.pop(); | ||
| 562 | } | ||
| 563 | if (branch->is_brk) { | ||
| 564 | if (branch->address == unassigned_branch) { | ||
| 565 | branch->address = conditional_query.pbk_stack.top(); | ||
| 566 | } | ||
| 567 | conditional_query.pbk_stack.pop(); | ||
| 568 | } | ||
| 569 | conditional_query.address = branch->address; | ||
| 570 | return true; | ||
| 571 | } | ||
| 572 | |||
| 573 | const auto* multi_branch = std::get_if<MultiBranch>(block.branch.get()); | ||
| 574 | for (const auto& branch_case : multi_branch->branches) { | ||
| 575 | auto& conditional_query = state.queries.emplace_back(q2); | ||
| 576 | conditional_query.address = branch_case.address; | ||
| 577 | } | ||
| 578 | |||
| 579 | return true; | ||
| 580 | } | ||
| 581 | |||
| 582 | void InsertBranch(ASTManager& mm, const BlockBranchInfo& branch_info) { | ||
| 583 | const auto get_expr = [](const Condition& cond) -> Expr { | ||
| 584 | Expr result; | ||
| 585 | if (cond.cc != ConditionCode::T) { | ||
| 586 | result = MakeExpr<ExprCondCode>(cond.cc); | ||
| 587 | } | ||
| 588 | if (cond.predicate != Pred::UnusedIndex) { | ||
| 589 | u32 pred = static_cast<u32>(cond.predicate); | ||
| 590 | bool negate = false; | ||
| 591 | if (pred > 7) { | ||
| 592 | negate = true; | ||
| 593 | pred -= 8; | ||
| 594 | } | ||
| 595 | Expr extra = MakeExpr<ExprPredicate>(pred); | ||
| 596 | if (negate) { | ||
| 597 | extra = MakeExpr<ExprNot>(std::move(extra)); | ||
| 598 | } | ||
| 599 | if (result) { | ||
| 600 | return MakeExpr<ExprAnd>(std::move(extra), std::move(result)); | ||
| 601 | } | ||
| 602 | return extra; | ||
| 603 | } | ||
| 604 | if (result) { | ||
| 605 | return result; | ||
| 606 | } | ||
| 607 | return MakeExpr<ExprBoolean>(true); | ||
| 608 | }; | ||
| 609 | |||
| 610 | if (std::holds_alternative<SingleBranch>(*branch_info)) { | ||
| 611 | const auto* branch = std::get_if<SingleBranch>(branch_info.get()); | ||
| 612 | if (branch->address < 0) { | ||
| 613 | if (branch->kill) { | ||
| 614 | mm.InsertReturn(get_expr(branch->condition), true); | ||
| 615 | return; | ||
| 616 | } | ||
| 617 | mm.InsertReturn(get_expr(branch->condition), false); | ||
| 618 | return; | ||
| 619 | } | ||
| 620 | mm.InsertGoto(get_expr(branch->condition), branch->address); | ||
| 621 | return; | ||
| 622 | } | ||
| 623 | const auto* multi_branch = std::get_if<MultiBranch>(branch_info.get()); | ||
| 624 | for (const auto& branch_case : multi_branch->branches) { | ||
| 625 | mm.InsertGoto(MakeExpr<ExprGprEqual>(multi_branch->gpr, branch_case.cmp_value), | ||
| 626 | branch_case.address); | ||
| 627 | } | ||
| 628 | } | ||
| 629 | |||
| 630 | void DecompileShader(CFGRebuildState& state) { | ||
| 631 | state.manager->Init(); | ||
| 632 | for (auto label : state.labels) { | ||
| 633 | state.manager->DeclareLabel(label); | ||
| 634 | } | ||
| 635 | for (const auto& block : state.block_info) { | ||
| 636 | if (state.labels.contains(block.start)) { | ||
| 637 | state.manager->InsertLabel(block.start); | ||
| 638 | } | ||
| 639 | const bool ignore = BlockBranchIsIgnored(block.branch); | ||
| 640 | const u32 end = ignore ? block.end + 1 : block.end; | ||
| 641 | state.manager->InsertBlock(block.start, end); | ||
| 642 | if (!ignore) { | ||
| 643 | InsertBranch(*state.manager, block.branch); | ||
| 644 | } | ||
| 645 | } | ||
| 646 | state.manager->Decompile(); | ||
| 647 | } | ||
| 648 | |||
| 649 | } // Anonymous namespace | ||
| 650 | |||
| 651 | std::unique_ptr<ShaderCharacteristics> ScanFlow(const ProgramCode& program_code, u32 start_address, | ||
| 652 | const CompilerSettings& settings, | ||
| 653 | Registry& registry) { | ||
| 654 | auto result_out = std::make_unique<ShaderCharacteristics>(); | ||
| 655 | if (settings.depth == CompileDepth::BruteForce) { | ||
| 656 | result_out->settings.depth = CompileDepth::BruteForce; | ||
| 657 | return result_out; | ||
| 658 | } | ||
| 659 | |||
| 660 | CFGRebuildState state{program_code, start_address, registry}; | ||
| 661 | // Inspect Code and generate blocks | ||
| 662 | state.labels.clear(); | ||
| 663 | state.labels.emplace(start_address); | ||
| 664 | state.inspect_queries.push_back(state.start); | ||
| 665 | while (!state.inspect_queries.empty()) { | ||
| 666 | if (!TryInspectAddress(state)) { | ||
| 667 | result_out->settings.depth = CompileDepth::BruteForce; | ||
| 668 | return result_out; | ||
| 669 | } | ||
| 670 | } | ||
| 671 | |||
| 672 | bool use_flow_stack = true; | ||
| 673 | |||
| 674 | bool decompiled = false; | ||
| 675 | |||
| 676 | if (settings.depth != CompileDepth::FlowStack) { | ||
| 677 | // Decompile Stacks | ||
| 678 | state.queries.push_back(Query{state.start, {}, {}}); | ||
| 679 | decompiled = true; | ||
| 680 | while (!state.queries.empty()) { | ||
| 681 | if (!TryQuery(state)) { | ||
| 682 | decompiled = false; | ||
| 683 | break; | ||
| 684 | } | ||
| 685 | } | ||
| 686 | } | ||
| 687 | |||
| 688 | use_flow_stack = !decompiled; | ||
| 689 | |||
| 690 | // Sort and organize results | ||
| 691 | std::sort(state.block_info.begin(), state.block_info.end(), | ||
| 692 | [](const BlockInfo& a, const BlockInfo& b) -> bool { return a.start < b.start; }); | ||
| 693 | if (decompiled && settings.depth != CompileDepth::NoFlowStack) { | ||
| 694 | ASTManager manager{settings.depth != CompileDepth::DecompileBackwards, | ||
| 695 | settings.disable_else_derivation}; | ||
| 696 | state.manager = &manager; | ||
| 697 | DecompileShader(state); | ||
| 698 | decompiled = state.manager->IsFullyDecompiled(); | ||
| 699 | if (!decompiled) { | ||
| 700 | if (settings.depth == CompileDepth::FullDecompile) { | ||
| 701 | LOG_CRITICAL(HW_GPU, "Failed to remove all the gotos!:"); | ||
| 702 | } else { | ||
| 703 | LOG_CRITICAL(HW_GPU, "Failed to remove all backward gotos!:"); | ||
| 704 | } | ||
| 705 | state.manager->ShowCurrentState("Of Shader"); | ||
| 706 | state.manager->Clear(); | ||
| 707 | } else { | ||
| 708 | auto characteristics = std::make_unique<ShaderCharacteristics>(); | ||
| 709 | characteristics->start = start_address; | ||
| 710 | characteristics->settings.depth = settings.depth; | ||
| 711 | characteristics->manager = std::move(manager); | ||
| 712 | characteristics->end = state.block_info.back().end + 1; | ||
| 713 | return characteristics; | ||
| 714 | } | ||
| 715 | } | ||
| 716 | |||
| 717 | result_out->start = start_address; | ||
| 718 | result_out->settings.depth = | ||
| 719 | use_flow_stack ? CompileDepth::FlowStack : CompileDepth::NoFlowStack; | ||
| 720 | result_out->blocks.clear(); | ||
| 721 | for (auto& block : state.block_info) { | ||
| 722 | ShaderBlock new_block{}; | ||
| 723 | new_block.start = block.start; | ||
| 724 | new_block.end = block.end; | ||
| 725 | new_block.ignore_branch = BlockBranchIsIgnored(block.branch); | ||
| 726 | if (!new_block.ignore_branch) { | ||
| 727 | new_block.branch = block.branch; | ||
| 728 | } | ||
| 729 | result_out->end = std::max(result_out->end, block.end); | ||
| 730 | result_out->blocks.push_back(new_block); | ||
| 731 | } | ||
| 732 | if (!use_flow_stack) { | ||
| 733 | result_out->labels = std::move(state.labels); | ||
| 734 | return result_out; | ||
| 735 | } | ||
| 736 | |||
| 737 | auto back = result_out->blocks.begin(); | ||
| 738 | auto next = std::next(back); | ||
| 739 | while (next != result_out->blocks.end()) { | ||
| 740 | if (!state.labels.contains(next->start) && next->start == back->end + 1) { | ||
| 741 | back->end = next->end; | ||
| 742 | next = result_out->blocks.erase(next); | ||
| 743 | continue; | ||
| 744 | } | ||
| 745 | back = next; | ||
| 746 | ++next; | ||
| 747 | } | ||
| 748 | |||
| 749 | return result_out; | ||
| 750 | } | ||
| 751 | } // namespace VideoCommon::Shader | ||
diff --git a/src/video_core/shader/control_flow.h b/src/video_core/shader/control_flow.h deleted file mode 100644 index 37bf96492..000000000 --- a/src/video_core/shader/control_flow.h +++ /dev/null | |||
| @@ -1,117 +0,0 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <list> | ||
| 8 | #include <optional> | ||
| 9 | #include <set> | ||
| 10 | #include <variant> | ||
| 11 | |||
| 12 | #include "video_core/engines/shader_bytecode.h" | ||
| 13 | #include "video_core/shader/ast.h" | ||
| 14 | #include "video_core/shader/compiler_settings.h" | ||
| 15 | #include "video_core/shader/registry.h" | ||
| 16 | #include "video_core/shader/shader_ir.h" | ||
| 17 | |||
| 18 | namespace VideoCommon::Shader { | ||
| 19 | |||
| 20 | using Tegra::Shader::ConditionCode; | ||
| 21 | using Tegra::Shader::Pred; | ||
| 22 | |||
| 23 | constexpr s32 exit_branch = -1; | ||
| 24 | |||
| 25 | struct Condition { | ||
| 26 | Pred predicate{Pred::UnusedIndex}; | ||
| 27 | ConditionCode cc{ConditionCode::T}; | ||
| 28 | |||
| 29 | bool IsUnconditional() const { | ||
| 30 | return predicate == Pred::UnusedIndex && cc == ConditionCode::T; | ||
| 31 | } | ||
| 32 | |||
| 33 | bool operator==(const Condition& other) const { | ||
| 34 | return std::tie(predicate, cc) == std::tie(other.predicate, other.cc); | ||
| 35 | } | ||
| 36 | |||
| 37 | bool operator!=(const Condition& other) const { | ||
| 38 | return !operator==(other); | ||
| 39 | } | ||
| 40 | }; | ||
| 41 | |||
| 42 | class SingleBranch { | ||
| 43 | public: | ||
| 44 | SingleBranch() = default; | ||
| 45 | explicit SingleBranch(Condition condition_, s32 address_, bool kill_, bool is_sync_, | ||
| 46 | bool is_brk_, bool ignore_) | ||
| 47 | : condition{condition_}, address{address_}, kill{kill_}, is_sync{is_sync_}, is_brk{is_brk_}, | ||
| 48 | ignore{ignore_} {} | ||
| 49 | |||
| 50 | bool operator==(const SingleBranch& b) const { | ||
| 51 | return std::tie(condition, address, kill, is_sync, is_brk, ignore) == | ||
| 52 | std::tie(b.condition, b.address, b.kill, b.is_sync, b.is_brk, b.ignore); | ||
| 53 | } | ||
| 54 | |||
| 55 | bool operator!=(const SingleBranch& b) const { | ||
| 56 | return !operator==(b); | ||
| 57 | } | ||
| 58 | |||
| 59 | Condition condition{}; | ||
| 60 | s32 address{exit_branch}; | ||
| 61 | bool kill{}; | ||
| 62 | bool is_sync{}; | ||
| 63 | bool is_brk{}; | ||
| 64 | bool ignore{}; | ||
| 65 | }; | ||
| 66 | |||
| 67 | struct CaseBranch { | ||
| 68 | explicit CaseBranch(u32 cmp_value_, u32 address_) : cmp_value{cmp_value_}, address{address_} {} | ||
| 69 | u32 cmp_value; | ||
| 70 | u32 address; | ||
| 71 | }; | ||
| 72 | |||
| 73 | class MultiBranch { | ||
| 74 | public: | ||
| 75 | explicit MultiBranch(u32 gpr_, std::vector<CaseBranch>&& branches_) | ||
| 76 | : gpr{gpr_}, branches{std::move(branches_)} {} | ||
| 77 | |||
| 78 | u32 gpr{}; | ||
| 79 | std::vector<CaseBranch> branches{}; | ||
| 80 | }; | ||
| 81 | |||
| 82 | using BranchData = std::variant<SingleBranch, MultiBranch>; | ||
| 83 | using BlockBranchInfo = std::shared_ptr<BranchData>; | ||
| 84 | |||
| 85 | bool BlockBranchInfoAreEqual(BlockBranchInfo first, BlockBranchInfo second); | ||
| 86 | |||
| 87 | struct ShaderBlock { | ||
| 88 | u32 start{}; | ||
| 89 | u32 end{}; | ||
| 90 | bool ignore_branch{}; | ||
| 91 | BlockBranchInfo branch{}; | ||
| 92 | |||
| 93 | bool operator==(const ShaderBlock& sb) const { | ||
| 94 | return std::tie(start, end, ignore_branch) == | ||
| 95 | std::tie(sb.start, sb.end, sb.ignore_branch) && | ||
| 96 | BlockBranchInfoAreEqual(branch, sb.branch); | ||
| 97 | } | ||
| 98 | |||
| 99 | bool operator!=(const ShaderBlock& sb) const { | ||
| 100 | return !operator==(sb); | ||
| 101 | } | ||
| 102 | }; | ||
| 103 | |||
| 104 | struct ShaderCharacteristics { | ||
| 105 | std::list<ShaderBlock> blocks{}; | ||
| 106 | std::set<u32> labels{}; | ||
| 107 | u32 start{}; | ||
| 108 | u32 end{}; | ||
| 109 | ASTManager manager{true, true}; | ||
| 110 | CompilerSettings settings{}; | ||
| 111 | }; | ||
| 112 | |||
| 113 | std::unique_ptr<ShaderCharacteristics> ScanFlow(const ProgramCode& program_code, u32 start_address, | ||
| 114 | const CompilerSettings& settings, | ||
| 115 | Registry& registry); | ||
| 116 | |||
| 117 | } // namespace VideoCommon::Shader | ||
diff --git a/src/video_core/shader/decode.cpp b/src/video_core/shader/decode.cpp deleted file mode 100644 index 6576d1208..000000000 --- a/src/video_core/shader/decode.cpp +++ /dev/null | |||
| @@ -1,368 +0,0 @@ | |||
| 1 | // Copyright 2018 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <cstring> | ||
| 6 | #include <limits> | ||
| 7 | #include <set> | ||
| 8 | |||
| 9 | #include <fmt/format.h> | ||
| 10 | |||
| 11 | #include "common/assert.h" | ||
| 12 | #include "common/common_types.h" | ||
| 13 | #include "video_core/engines/shader_bytecode.h" | ||
| 14 | #include "video_core/engines/shader_header.h" | ||
| 15 | #include "video_core/shader/control_flow.h" | ||
| 16 | #include "video_core/shader/memory_util.h" | ||
| 17 | #include "video_core/shader/node_helper.h" | ||
| 18 | #include "video_core/shader/shader_ir.h" | ||
| 19 | |||
| 20 | namespace VideoCommon::Shader { | ||
| 21 | |||
| 22 | using Tegra::Shader::Instruction; | ||
| 23 | using Tegra::Shader::OpCode; | ||
| 24 | |||
| 25 | namespace { | ||
| 26 | |||
| 27 | void DeduceTextureHandlerSize(VideoCore::GuestDriverProfile& gpu_driver, | ||
| 28 | const std::list<SamplerEntry>& used_samplers) { | ||
| 29 | if (gpu_driver.IsTextureHandlerSizeKnown() || used_samplers.size() <= 1) { | ||
| 30 | return; | ||
| 31 | } | ||
| 32 | u32 count{}; | ||
| 33 | std::vector<u32> bound_offsets; | ||
| 34 | for (const auto& sampler : used_samplers) { | ||
| 35 | if (sampler.is_bindless) { | ||
| 36 | continue; | ||
| 37 | } | ||
| 38 | ++count; | ||
| 39 | bound_offsets.emplace_back(sampler.offset); | ||
| 40 | } | ||
| 41 | if (count > 1) { | ||
| 42 | gpu_driver.DeduceTextureHandlerSize(std::move(bound_offsets)); | ||
| 43 | } | ||
| 44 | } | ||
| 45 | |||
| 46 | std::optional<u32> TryDeduceSamplerSize(const SamplerEntry& sampler_to_deduce, | ||
| 47 | VideoCore::GuestDriverProfile& gpu_driver, | ||
| 48 | const std::list<SamplerEntry>& used_samplers) { | ||
| 49 | const u32 base_offset = sampler_to_deduce.offset; | ||
| 50 | u32 max_offset{std::numeric_limits<u32>::max()}; | ||
| 51 | for (const auto& sampler : used_samplers) { | ||
| 52 | if (sampler.is_bindless) { | ||
| 53 | continue; | ||
| 54 | } | ||
| 55 | if (sampler.offset > base_offset) { | ||
| 56 | max_offset = std::min(sampler.offset, max_offset); | ||
| 57 | } | ||
| 58 | } | ||
| 59 | if (max_offset == std::numeric_limits<u32>::max()) { | ||
| 60 | return std::nullopt; | ||
| 61 | } | ||
| 62 | return ((max_offset - base_offset) * 4) / gpu_driver.GetTextureHandlerSize(); | ||
| 63 | } | ||
| 64 | |||
| 65 | } // Anonymous namespace | ||
| 66 | |||
| 67 | class ASTDecoder { | ||
| 68 | public: | ||
| 69 | explicit ASTDecoder(ShaderIR& ir_) : ir(ir_) {} | ||
| 70 | |||
| 71 | void operator()(ASTProgram& ast) { | ||
| 72 | ASTNode current = ast.nodes.GetFirst(); | ||
| 73 | while (current) { | ||
| 74 | Visit(current); | ||
| 75 | current = current->GetNext(); | ||
| 76 | } | ||
| 77 | } | ||
| 78 | |||
| 79 | void operator()(ASTIfThen& ast) { | ||
| 80 | ASTNode current = ast.nodes.GetFirst(); | ||
| 81 | while (current) { | ||
| 82 | Visit(current); | ||
| 83 | current = current->GetNext(); | ||
| 84 | } | ||
| 85 | } | ||
| 86 | |||
| 87 | void operator()(ASTIfElse& ast) { | ||
| 88 | ASTNode current = ast.nodes.GetFirst(); | ||
| 89 | while (current) { | ||
| 90 | Visit(current); | ||
| 91 | current = current->GetNext(); | ||
| 92 | } | ||
| 93 | } | ||
| 94 | |||
| 95 | void operator()(ASTBlockEncoded& ast) {} | ||
| 96 | |||
| 97 | void operator()(ASTBlockDecoded& ast) {} | ||
| 98 | |||
| 99 | void operator()(ASTVarSet& ast) {} | ||
| 100 | |||
| 101 | void operator()(ASTLabel& ast) {} | ||
| 102 | |||
| 103 | void operator()(ASTGoto& ast) {} | ||
| 104 | |||
| 105 | void operator()(ASTDoWhile& ast) { | ||
| 106 | ASTNode current = ast.nodes.GetFirst(); | ||
| 107 | while (current) { | ||
| 108 | Visit(current); | ||
| 109 | current = current->GetNext(); | ||
| 110 | } | ||
| 111 | } | ||
| 112 | |||
| 113 | void operator()(ASTReturn& ast) {} | ||
| 114 | |||
| 115 | void operator()(ASTBreak& ast) {} | ||
| 116 | |||
| 117 | void Visit(ASTNode& node) { | ||
| 118 | std::visit(*this, *node->GetInnerData()); | ||
| 119 | if (node->IsBlockEncoded()) { | ||
| 120 | auto block = std::get_if<ASTBlockEncoded>(node->GetInnerData()); | ||
| 121 | NodeBlock bb = ir.DecodeRange(block->start, block->end); | ||
| 122 | node->TransformBlockEncoded(std::move(bb)); | ||
| 123 | } | ||
| 124 | } | ||
| 125 | |||
| 126 | private: | ||
| 127 | ShaderIR& ir; | ||
| 128 | }; | ||
| 129 | |||
| 130 | void ShaderIR::Decode() { | ||
| 131 | std::memcpy(&header, program_code.data(), sizeof(Tegra::Shader::Header)); | ||
| 132 | |||
| 133 | decompiled = false; | ||
| 134 | auto info = ScanFlow(program_code, main_offset, settings, registry); | ||
| 135 | auto& shader_info = *info; | ||
| 136 | coverage_begin = shader_info.start; | ||
| 137 | coverage_end = shader_info.end; | ||
| 138 | switch (shader_info.settings.depth) { | ||
| 139 | case CompileDepth::FlowStack: { | ||
| 140 | for (const auto& block : shader_info.blocks) { | ||
| 141 | basic_blocks.insert({block.start, DecodeRange(block.start, block.end + 1)}); | ||
| 142 | } | ||
| 143 | break; | ||
| 144 | } | ||
| 145 | case CompileDepth::NoFlowStack: { | ||
| 146 | disable_flow_stack = true; | ||
| 147 | const auto insert_block = [this](NodeBlock& nodes, u32 label) { | ||
| 148 | if (label == static_cast<u32>(exit_branch)) { | ||
| 149 | return; | ||
| 150 | } | ||
| 151 | basic_blocks.insert({label, nodes}); | ||
| 152 | }; | ||
| 153 | const auto& blocks = shader_info.blocks; | ||
| 154 | NodeBlock current_block; | ||
| 155 | u32 current_label = static_cast<u32>(exit_branch); | ||
| 156 | for (const auto& block : blocks) { | ||
| 157 | if (shader_info.labels.contains(block.start)) { | ||
| 158 | insert_block(current_block, current_label); | ||
| 159 | current_block.clear(); | ||
| 160 | current_label = block.start; | ||
| 161 | } | ||
| 162 | if (!block.ignore_branch) { | ||
| 163 | DecodeRangeInner(current_block, block.start, block.end); | ||
| 164 | InsertControlFlow(current_block, block); | ||
| 165 | } else { | ||
| 166 | DecodeRangeInner(current_block, block.start, block.end + 1); | ||
| 167 | } | ||
| 168 | } | ||
| 169 | insert_block(current_block, current_label); | ||
| 170 | break; | ||
| 171 | } | ||
| 172 | case CompileDepth::DecompileBackwards: | ||
| 173 | case CompileDepth::FullDecompile: { | ||
| 174 | program_manager = std::move(shader_info.manager); | ||
| 175 | disable_flow_stack = true; | ||
| 176 | decompiled = true; | ||
| 177 | ASTDecoder decoder{*this}; | ||
| 178 | ASTNode program = GetASTProgram(); | ||
| 179 | decoder.Visit(program); | ||
| 180 | break; | ||
| 181 | } | ||
| 182 | default: | ||
| 183 | LOG_CRITICAL(HW_GPU, "Unknown decompilation mode!"); | ||
| 184 | [[fallthrough]]; | ||
| 185 | case CompileDepth::BruteForce: { | ||
| 186 | const auto shader_end = static_cast<u32>(program_code.size()); | ||
| 187 | coverage_begin = main_offset; | ||
| 188 | coverage_end = shader_end; | ||
| 189 | for (u32 label = main_offset; label < shader_end; ++label) { | ||
| 190 | basic_blocks.insert({label, DecodeRange(label, label + 1)}); | ||
| 191 | } | ||
| 192 | break; | ||
| 193 | } | ||
| 194 | } | ||
| 195 | if (settings.depth != shader_info.settings.depth) { | ||
| 196 | LOG_WARNING( | ||
| 197 | HW_GPU, "Decompiling to this setting \"{}\" failed, downgrading to this setting \"{}\"", | ||
| 198 | CompileDepthAsString(settings.depth), CompileDepthAsString(shader_info.settings.depth)); | ||
| 199 | } | ||
| 200 | } | ||
| 201 | |||
| 202 | NodeBlock ShaderIR::DecodeRange(u32 begin, u32 end) { | ||
| 203 | NodeBlock basic_block; | ||
| 204 | DecodeRangeInner(basic_block, begin, end); | ||
| 205 | return basic_block; | ||
| 206 | } | ||
| 207 | |||
| 208 | void ShaderIR::DecodeRangeInner(NodeBlock& bb, u32 begin, u32 end) { | ||
| 209 | for (u32 pc = begin; pc < (begin > end ? MAX_PROGRAM_LENGTH : end);) { | ||
| 210 | pc = DecodeInstr(bb, pc); | ||
| 211 | } | ||
| 212 | } | ||
| 213 | |||
| 214 | void ShaderIR::InsertControlFlow(NodeBlock& bb, const ShaderBlock& block) { | ||
| 215 | const auto apply_conditions = [&](const Condition& cond, Node n) -> Node { | ||
| 216 | Node result = n; | ||
| 217 | if (cond.cc != ConditionCode::T) { | ||
| 218 | result = Conditional(GetConditionCode(cond.cc), {result}); | ||
| 219 | } | ||
| 220 | if (cond.predicate != Pred::UnusedIndex) { | ||
| 221 | u32 pred = static_cast<u32>(cond.predicate); | ||
| 222 | const bool is_neg = pred > 7; | ||
| 223 | if (is_neg) { | ||
| 224 | pred -= 8; | ||
| 225 | } | ||
| 226 | result = Conditional(GetPredicate(pred, is_neg), {result}); | ||
| 227 | } | ||
| 228 | return result; | ||
| 229 | }; | ||
| 230 | if (std::holds_alternative<SingleBranch>(*block.branch)) { | ||
| 231 | auto branch = std::get_if<SingleBranch>(block.branch.get()); | ||
| 232 | if (branch->address < 0) { | ||
| 233 | if (branch->kill) { | ||
| 234 | Node n = Operation(OperationCode::Discard); | ||
| 235 | n = apply_conditions(branch->condition, n); | ||
| 236 | bb.push_back(n); | ||
| 237 | global_code.push_back(n); | ||
| 238 | return; | ||
| 239 | } | ||
| 240 | Node n = Operation(OperationCode::Exit); | ||
| 241 | n = apply_conditions(branch->condition, n); | ||
| 242 | bb.push_back(n); | ||
| 243 | global_code.push_back(n); | ||
| 244 | return; | ||
| 245 | } | ||
| 246 | Node n = Operation(OperationCode::Branch, Immediate(branch->address)); | ||
| 247 | n = apply_conditions(branch->condition, n); | ||
| 248 | bb.push_back(n); | ||
| 249 | global_code.push_back(n); | ||
| 250 | return; | ||
| 251 | } | ||
| 252 | auto multi_branch = std::get_if<MultiBranch>(block.branch.get()); | ||
| 253 | Node op_a = GetRegister(multi_branch->gpr); | ||
| 254 | for (auto& branch_case : multi_branch->branches) { | ||
| 255 | Node n = Operation(OperationCode::Branch, Immediate(branch_case.address)); | ||
| 256 | Node op_b = Immediate(branch_case.cmp_value); | ||
| 257 | Node condition = | ||
| 258 | GetPredicateComparisonInteger(Tegra::Shader::PredCondition::EQ, false, op_a, op_b); | ||
| 259 | auto result = Conditional(condition, {n}); | ||
| 260 | bb.push_back(result); | ||
| 261 | global_code.push_back(result); | ||
| 262 | } | ||
| 263 | } | ||
| 264 | |||
| 265 | u32 ShaderIR::DecodeInstr(NodeBlock& bb, u32 pc) { | ||
| 266 | // Ignore sched instructions when generating code. | ||
| 267 | if (IsSchedInstruction(pc, main_offset)) { | ||
| 268 | return pc + 1; | ||
| 269 | } | ||
| 270 | |||
| 271 | const Instruction instr = {program_code[pc]}; | ||
| 272 | const auto opcode = OpCode::Decode(instr); | ||
| 273 | const u32 nv_address = ConvertAddressToNvidiaSpace(pc); | ||
| 274 | |||
| 275 | // Decoding failure | ||
| 276 | if (!opcode) { | ||
| 277 | UNIMPLEMENTED_MSG("Unhandled instruction: {0:x}", instr.value); | ||
| 278 | bb.push_back(Comment(fmt::format("{:05x} Unimplemented Shader instruction (0x{:016x})", | ||
| 279 | nv_address, instr.value))); | ||
| 280 | return pc + 1; | ||
| 281 | } | ||
| 282 | |||
| 283 | bb.push_back(Comment( | ||
| 284 | fmt::format("{:05x} {} (0x{:016x})", nv_address, opcode->get().GetName(), instr.value))); | ||
| 285 | |||
| 286 | using Tegra::Shader::Pred; | ||
| 287 | UNIMPLEMENTED_IF_MSG(instr.pred.full_pred == Pred::NeverExecute, | ||
| 288 | "NeverExecute predicate not implemented"); | ||
| 289 | |||
| 290 | static const std::map<OpCode::Type, u32 (ShaderIR::*)(NodeBlock&, u32)> decoders = { | ||
| 291 | {OpCode::Type::Arithmetic, &ShaderIR::DecodeArithmetic}, | ||
| 292 | {OpCode::Type::ArithmeticImmediate, &ShaderIR::DecodeArithmeticImmediate}, | ||
| 293 | {OpCode::Type::Bfe, &ShaderIR::DecodeBfe}, | ||
| 294 | {OpCode::Type::Bfi, &ShaderIR::DecodeBfi}, | ||
| 295 | {OpCode::Type::Shift, &ShaderIR::DecodeShift}, | ||
| 296 | {OpCode::Type::ArithmeticInteger, &ShaderIR::DecodeArithmeticInteger}, | ||
| 297 | {OpCode::Type::ArithmeticIntegerImmediate, &ShaderIR::DecodeArithmeticIntegerImmediate}, | ||
| 298 | {OpCode::Type::ArithmeticHalf, &ShaderIR::DecodeArithmeticHalf}, | ||
| 299 | {OpCode::Type::ArithmeticHalfImmediate, &ShaderIR::DecodeArithmeticHalfImmediate}, | ||
| 300 | {OpCode::Type::Ffma, &ShaderIR::DecodeFfma}, | ||
| 301 | {OpCode::Type::Hfma2, &ShaderIR::DecodeHfma2}, | ||
| 302 | {OpCode::Type::Conversion, &ShaderIR::DecodeConversion}, | ||
| 303 | {OpCode::Type::Warp, &ShaderIR::DecodeWarp}, | ||
| 304 | {OpCode::Type::Memory, &ShaderIR::DecodeMemory}, | ||
| 305 | {OpCode::Type::Texture, &ShaderIR::DecodeTexture}, | ||
| 306 | {OpCode::Type::Image, &ShaderIR::DecodeImage}, | ||
| 307 | {OpCode::Type::FloatSetPredicate, &ShaderIR::DecodeFloatSetPredicate}, | ||
| 308 | {OpCode::Type::IntegerSetPredicate, &ShaderIR::DecodeIntegerSetPredicate}, | ||
| 309 | {OpCode::Type::HalfSetPredicate, &ShaderIR::DecodeHalfSetPredicate}, | ||
| 310 | {OpCode::Type::PredicateSetRegister, &ShaderIR::DecodePredicateSetRegister}, | ||
| 311 | {OpCode::Type::PredicateSetPredicate, &ShaderIR::DecodePredicateSetPredicate}, | ||
| 312 | {OpCode::Type::RegisterSetPredicate, &ShaderIR::DecodeRegisterSetPredicate}, | ||
| 313 | {OpCode::Type::FloatSet, &ShaderIR::DecodeFloatSet}, | ||
| 314 | {OpCode::Type::IntegerSet, &ShaderIR::DecodeIntegerSet}, | ||
| 315 | {OpCode::Type::HalfSet, &ShaderIR::DecodeHalfSet}, | ||
| 316 | {OpCode::Type::Video, &ShaderIR::DecodeVideo}, | ||
| 317 | {OpCode::Type::Xmad, &ShaderIR::DecodeXmad}, | ||
| 318 | }; | ||
| 319 | |||
| 320 | std::vector<Node> tmp_block; | ||
| 321 | if (const auto decoder = decoders.find(opcode->get().GetType()); decoder != decoders.end()) { | ||
| 322 | pc = (this->*decoder->second)(tmp_block, pc); | ||
| 323 | } else { | ||
| 324 | pc = DecodeOther(tmp_block, pc); | ||
| 325 | } | ||
| 326 | |||
| 327 | // Some instructions (like SSY) don't have a predicate field, they are always unconditionally | ||
| 328 | // executed. | ||
| 329 | const bool can_be_predicated = OpCode::IsPredicatedInstruction(opcode->get().GetId()); | ||
| 330 | const auto pred_index = static_cast<u32>(instr.pred.pred_index); | ||
| 331 | |||
| 332 | if (can_be_predicated && pred_index != static_cast<u32>(Pred::UnusedIndex)) { | ||
| 333 | const Node conditional = | ||
| 334 | Conditional(GetPredicate(pred_index, instr.negate_pred != 0), std::move(tmp_block)); | ||
| 335 | global_code.push_back(conditional); | ||
| 336 | bb.push_back(conditional); | ||
| 337 | } else { | ||
| 338 | for (auto& node : tmp_block) { | ||
| 339 | global_code.push_back(node); | ||
| 340 | bb.push_back(node); | ||
| 341 | } | ||
| 342 | } | ||
| 343 | |||
| 344 | return pc + 1; | ||
| 345 | } | ||
| 346 | |||
| 347 | void ShaderIR::PostDecode() { | ||
| 348 | // Deduce texture handler size if needed | ||
| 349 | auto gpu_driver = registry.AccessGuestDriverProfile(); | ||
| 350 | DeduceTextureHandlerSize(gpu_driver, used_samplers); | ||
| 351 | // Deduce Indexed Samplers | ||
| 352 | if (!uses_indexed_samplers) { | ||
| 353 | return; | ||
| 354 | } | ||
| 355 | for (auto& sampler : used_samplers) { | ||
| 356 | if (!sampler.is_indexed) { | ||
| 357 | continue; | ||
| 358 | } | ||
| 359 | if (const auto size = TryDeduceSamplerSize(sampler, gpu_driver, used_samplers)) { | ||
| 360 | sampler.size = *size; | ||
| 361 | } else { | ||
| 362 | LOG_CRITICAL(HW_GPU, "Failed to deduce size of indexed sampler"); | ||
| 363 | sampler.size = 1; | ||
| 364 | } | ||
| 365 | } | ||
| 366 | } | ||
| 367 | |||
| 368 | } // namespace VideoCommon::Shader | ||
diff --git a/src/video_core/shader/decode/arithmetic.cpp b/src/video_core/shader/decode/arithmetic.cpp deleted file mode 100644 index 15eb700e7..000000000 --- a/src/video_core/shader/decode/arithmetic.cpp +++ /dev/null | |||
| @@ -1,166 +0,0 @@ | |||
| 1 | // Copyright 2018 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/assert.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "common/logging/log.h" | ||
| 8 | #include "video_core/engines/shader_bytecode.h" | ||
| 9 | #include "video_core/shader/node_helper.h" | ||
| 10 | #include "video_core/shader/shader_ir.h" | ||
| 11 | |||
| 12 | namespace VideoCommon::Shader { | ||
| 13 | |||
| 14 | using Tegra::Shader::Instruction; | ||
| 15 | using Tegra::Shader::OpCode; | ||
| 16 | using Tegra::Shader::SubOp; | ||
| 17 | |||
| 18 | u32 ShaderIR::DecodeArithmetic(NodeBlock& bb, u32 pc) { | ||
| 19 | const Instruction instr = {program_code[pc]}; | ||
| 20 | const auto opcode = OpCode::Decode(instr); | ||
| 21 | |||
| 22 | Node op_a = GetRegister(instr.gpr8); | ||
| 23 | |||
| 24 | Node op_b = [&] { | ||
| 25 | if (instr.is_b_imm) { | ||
| 26 | return GetImmediate19(instr); | ||
| 27 | } else if (instr.is_b_gpr) { | ||
| 28 | return GetRegister(instr.gpr20); | ||
| 29 | } else { | ||
| 30 | return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()); | ||
| 31 | } | ||
| 32 | }(); | ||
| 33 | |||
| 34 | switch (opcode->get().GetId()) { | ||
| 35 | case OpCode::Id::MOV_C: | ||
| 36 | case OpCode::Id::MOV_R: { | ||
| 37 | // MOV does not have neither 'abs' nor 'neg' bits. | ||
| 38 | SetRegister(bb, instr.gpr0, op_b); | ||
| 39 | break; | ||
| 40 | } | ||
| 41 | case OpCode::Id::FMUL_C: | ||
| 42 | case OpCode::Id::FMUL_R: | ||
| 43 | case OpCode::Id::FMUL_IMM: { | ||
| 44 | // FMUL does not have 'abs' bits and only the second operand has a 'neg' bit. | ||
| 45 | if (instr.fmul.tab5cb8_2 != 0) { | ||
| 46 | LOG_DEBUG(HW_GPU, "FMUL tab5cb8_2({}) is not implemented", | ||
| 47 | instr.fmul.tab5cb8_2.Value()); | ||
| 48 | } | ||
| 49 | if (instr.fmul.tab5c68_0 != 1) { | ||
| 50 | LOG_DEBUG(HW_GPU, "FMUL tab5cb8_0({}) is not implemented", | ||
| 51 | instr.fmul.tab5c68_0.Value()); | ||
| 52 | } | ||
| 53 | |||
| 54 | op_b = GetOperandAbsNegFloat(op_b, false, instr.fmul.negate_b); | ||
| 55 | |||
| 56 | static constexpr std::array FmulPostFactor = { | ||
| 57 | 1.000f, // None | ||
| 58 | 0.500f, // Divide 2 | ||
| 59 | 0.250f, // Divide 4 | ||
| 60 | 0.125f, // Divide 8 | ||
| 61 | 8.000f, // Mul 8 | ||
| 62 | 4.000f, // Mul 4 | ||
| 63 | 2.000f, // Mul 2 | ||
| 64 | }; | ||
| 65 | |||
| 66 | if (instr.fmul.postfactor != 0) { | ||
| 67 | op_a = Operation(OperationCode::FMul, NO_PRECISE, op_a, | ||
| 68 | Immediate(FmulPostFactor[instr.fmul.postfactor])); | ||
| 69 | } | ||
| 70 | |||
| 71 | // TODO(Rodrigo): Should precise be used when there's a postfactor? | ||
| 72 | Node value = Operation(OperationCode::FMul, PRECISE, op_a, op_b); | ||
| 73 | |||
| 74 | value = GetSaturatedFloat(value, instr.alu.saturate_d); | ||
| 75 | |||
| 76 | SetInternalFlagsFromFloat(bb, value, instr.generates_cc); | ||
| 77 | SetRegister(bb, instr.gpr0, value); | ||
| 78 | break; | ||
| 79 | } | ||
| 80 | case OpCode::Id::FADD_C: | ||
| 81 | case OpCode::Id::FADD_R: | ||
| 82 | case OpCode::Id::FADD_IMM: { | ||
| 83 | op_a = GetOperandAbsNegFloat(op_a, instr.alu.abs_a, instr.alu.negate_a); | ||
| 84 | op_b = GetOperandAbsNegFloat(op_b, instr.alu.abs_b, instr.alu.negate_b); | ||
| 85 | |||
| 86 | Node value = Operation(OperationCode::FAdd, PRECISE, op_a, op_b); | ||
| 87 | value = GetSaturatedFloat(value, instr.alu.saturate_d); | ||
| 88 | |||
| 89 | SetInternalFlagsFromFloat(bb, value, instr.generates_cc); | ||
| 90 | SetRegister(bb, instr.gpr0, value); | ||
| 91 | break; | ||
| 92 | } | ||
| 93 | case OpCode::Id::MUFU: { | ||
| 94 | op_a = GetOperandAbsNegFloat(op_a, instr.alu.abs_a, instr.alu.negate_a); | ||
| 95 | |||
| 96 | Node value = [&]() { | ||
| 97 | switch (instr.sub_op) { | ||
| 98 | case SubOp::Cos: | ||
| 99 | return Operation(OperationCode::FCos, PRECISE, op_a); | ||
| 100 | case SubOp::Sin: | ||
| 101 | return Operation(OperationCode::FSin, PRECISE, op_a); | ||
| 102 | case SubOp::Ex2: | ||
| 103 | return Operation(OperationCode::FExp2, PRECISE, op_a); | ||
| 104 | case SubOp::Lg2: | ||
| 105 | return Operation(OperationCode::FLog2, PRECISE, op_a); | ||
| 106 | case SubOp::Rcp: | ||
| 107 | return Operation(OperationCode::FDiv, PRECISE, Immediate(1.0f), op_a); | ||
| 108 | case SubOp::Rsq: | ||
| 109 | return Operation(OperationCode::FInverseSqrt, PRECISE, op_a); | ||
| 110 | case SubOp::Sqrt: | ||
| 111 | return Operation(OperationCode::FSqrt, PRECISE, op_a); | ||
| 112 | default: | ||
| 113 | UNIMPLEMENTED_MSG("Unhandled MUFU sub op={0:x}", instr.sub_op.Value()); | ||
| 114 | return Immediate(0); | ||
| 115 | } | ||
| 116 | }(); | ||
| 117 | value = GetSaturatedFloat(value, instr.alu.saturate_d); | ||
| 118 | |||
| 119 | SetRegister(bb, instr.gpr0, value); | ||
| 120 | break; | ||
| 121 | } | ||
| 122 | case OpCode::Id::FMNMX_C: | ||
| 123 | case OpCode::Id::FMNMX_R: | ||
| 124 | case OpCode::Id::FMNMX_IMM: { | ||
| 125 | op_a = GetOperandAbsNegFloat(op_a, instr.alu.abs_a, instr.alu.negate_a); | ||
| 126 | op_b = GetOperandAbsNegFloat(op_b, instr.alu.abs_b, instr.alu.negate_b); | ||
| 127 | |||
| 128 | const Node condition = GetPredicate(instr.alu.fmnmx.pred, instr.alu.fmnmx.negate_pred != 0); | ||
| 129 | |||
| 130 | const Node min = Operation(OperationCode::FMin, NO_PRECISE, op_a, op_b); | ||
| 131 | const Node max = Operation(OperationCode::FMax, NO_PRECISE, op_a, op_b); | ||
| 132 | const Node value = Operation(OperationCode::Select, NO_PRECISE, condition, min, max); | ||
| 133 | |||
| 134 | SetInternalFlagsFromFloat(bb, value, instr.generates_cc); | ||
| 135 | SetRegister(bb, instr.gpr0, value); | ||
| 136 | break; | ||
| 137 | } | ||
| 138 | case OpCode::Id::FCMP_RR: | ||
| 139 | case OpCode::Id::FCMP_RC: | ||
| 140 | case OpCode::Id::FCMP_IMMR: { | ||
| 141 | UNIMPLEMENTED_IF(instr.fcmp.ftz == 0); | ||
| 142 | Node op_c = GetRegister(instr.gpr39); | ||
| 143 | Node comp = GetPredicateComparisonFloat(instr.fcmp.cond, std::move(op_c), Immediate(0.0f)); | ||
| 144 | SetRegister( | ||
| 145 | bb, instr.gpr0, | ||
| 146 | Operation(OperationCode::Select, std::move(comp), std::move(op_a), std::move(op_b))); | ||
| 147 | break; | ||
| 148 | } | ||
| 149 | case OpCode::Id::RRO_C: | ||
| 150 | case OpCode::Id::RRO_R: | ||
| 151 | case OpCode::Id::RRO_IMM: { | ||
| 152 | LOG_DEBUG(HW_GPU, "(STUBBED) RRO used"); | ||
| 153 | |||
| 154 | // Currently RRO is only implemented as a register move. | ||
| 155 | op_b = GetOperandAbsNegFloat(op_b, instr.alu.abs_b, instr.alu.negate_b); | ||
| 156 | SetRegister(bb, instr.gpr0, op_b); | ||
| 157 | break; | ||
| 158 | } | ||
| 159 | default: | ||
| 160 | UNIMPLEMENTED_MSG("Unhandled arithmetic instruction: {}", opcode->get().GetName()); | ||
| 161 | } | ||
| 162 | |||
| 163 | return pc; | ||
| 164 | } | ||
| 165 | |||
| 166 | } // namespace VideoCommon::Shader | ||
diff --git a/src/video_core/shader/decode/arithmetic_half.cpp b/src/video_core/shader/decode/arithmetic_half.cpp deleted file mode 100644 index 88103fede..000000000 --- a/src/video_core/shader/decode/arithmetic_half.cpp +++ /dev/null | |||
| @@ -1,101 +0,0 @@ | |||
| 1 | // Copyright 2018 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/assert.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "common/logging/log.h" | ||
| 8 | #include "video_core/engines/shader_bytecode.h" | ||
| 9 | #include "video_core/shader/node_helper.h" | ||
| 10 | #include "video_core/shader/shader_ir.h" | ||
| 11 | |||
| 12 | namespace VideoCommon::Shader { | ||
| 13 | |||
| 14 | using Tegra::Shader::HalfType; | ||
| 15 | using Tegra::Shader::Instruction; | ||
| 16 | using Tegra::Shader::OpCode; | ||
| 17 | |||
| 18 | u32 ShaderIR::DecodeArithmeticHalf(NodeBlock& bb, u32 pc) { | ||
| 19 | const Instruction instr = {program_code[pc]}; | ||
| 20 | const auto opcode = OpCode::Decode(instr); | ||
| 21 | |||
| 22 | bool negate_a = false; | ||
| 23 | bool negate_b = false; | ||
| 24 | bool absolute_a = false; | ||
| 25 | bool absolute_b = false; | ||
| 26 | |||
| 27 | switch (opcode->get().GetId()) { | ||
| 28 | case OpCode::Id::HADD2_R: | ||
| 29 | if (instr.alu_half.ftz == 0) { | ||
| 30 | LOG_DEBUG(HW_GPU, "{} without FTZ is not implemented", opcode->get().GetName()); | ||
| 31 | } | ||
| 32 | negate_a = ((instr.value >> 43) & 1) != 0; | ||
| 33 | negate_b = ((instr.value >> 31) & 1) != 0; | ||
| 34 | absolute_a = ((instr.value >> 44) & 1) != 0; | ||
| 35 | absolute_b = ((instr.value >> 30) & 1) != 0; | ||
| 36 | break; | ||
| 37 | case OpCode::Id::HADD2_C: | ||
| 38 | if (instr.alu_half.ftz == 0) { | ||
| 39 | LOG_DEBUG(HW_GPU, "{} without FTZ is not implemented", opcode->get().GetName()); | ||
| 40 | } | ||
| 41 | negate_a = ((instr.value >> 43) & 1) != 0; | ||
| 42 | negate_b = ((instr.value >> 56) & 1) != 0; | ||
| 43 | absolute_a = ((instr.value >> 44) & 1) != 0; | ||
| 44 | absolute_b = ((instr.value >> 54) & 1) != 0; | ||
| 45 | break; | ||
| 46 | case OpCode::Id::HMUL2_R: | ||
| 47 | negate_a = ((instr.value >> 43) & 1) != 0; | ||
| 48 | absolute_a = ((instr.value >> 44) & 1) != 0; | ||
| 49 | absolute_b = ((instr.value >> 30) & 1) != 0; | ||
| 50 | break; | ||
| 51 | case OpCode::Id::HMUL2_C: | ||
| 52 | negate_b = ((instr.value >> 31) & 1) != 0; | ||
| 53 | absolute_a = ((instr.value >> 44) & 1) != 0; | ||
| 54 | absolute_b = ((instr.value >> 54) & 1) != 0; | ||
| 55 | break; | ||
| 56 | default: | ||
| 57 | UNREACHABLE(); | ||
| 58 | break; | ||
| 59 | } | ||
| 60 | |||
| 61 | Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.alu_half.type_a); | ||
| 62 | op_a = GetOperandAbsNegHalf(op_a, absolute_a, negate_a); | ||
| 63 | |||
| 64 | auto [type_b, op_b] = [this, instr, opcode]() -> std::pair<HalfType, Node> { | ||
| 65 | switch (opcode->get().GetId()) { | ||
| 66 | case OpCode::Id::HADD2_C: | ||
| 67 | case OpCode::Id::HMUL2_C: | ||
| 68 | return {HalfType::F32, GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset())}; | ||
| 69 | case OpCode::Id::HADD2_R: | ||
| 70 | case OpCode::Id::HMUL2_R: | ||
| 71 | return {instr.alu_half.type_b, GetRegister(instr.gpr20)}; | ||
| 72 | default: | ||
| 73 | UNREACHABLE(); | ||
| 74 | return {HalfType::F32, Immediate(0)}; | ||
| 75 | } | ||
| 76 | }(); | ||
| 77 | op_b = UnpackHalfFloat(op_b, type_b); | ||
| 78 | op_b = GetOperandAbsNegHalf(op_b, absolute_b, negate_b); | ||
| 79 | |||
| 80 | Node value = [this, opcode, op_a, op_b = op_b] { | ||
| 81 | switch (opcode->get().GetId()) { | ||
| 82 | case OpCode::Id::HADD2_C: | ||
| 83 | case OpCode::Id::HADD2_R: | ||
| 84 | return Operation(OperationCode::HAdd, PRECISE, op_a, op_b); | ||
| 85 | case OpCode::Id::HMUL2_C: | ||
| 86 | case OpCode::Id::HMUL2_R: | ||
| 87 | return Operation(OperationCode::HMul, PRECISE, op_a, op_b); | ||
| 88 | default: | ||
| 89 | UNIMPLEMENTED_MSG("Unhandled half float instruction: {}", opcode->get().GetName()); | ||
| 90 | return Immediate(0); | ||
| 91 | } | ||
| 92 | }(); | ||
| 93 | value = GetSaturatedHalfFloat(value, instr.alu_half.saturate); | ||
| 94 | value = HalfMerge(GetRegister(instr.gpr0), value, instr.alu_half.merge); | ||
| 95 | |||
| 96 | SetRegister(bb, instr.gpr0, value); | ||
| 97 | |||
| 98 | return pc; | ||
| 99 | } | ||
| 100 | |||
| 101 | } // namespace VideoCommon::Shader | ||
diff --git a/src/video_core/shader/decode/arithmetic_half_immediate.cpp b/src/video_core/shader/decode/arithmetic_half_immediate.cpp deleted file mode 100644 index d179b9873..000000000 --- a/src/video_core/shader/decode/arithmetic_half_immediate.cpp +++ /dev/null | |||
| @@ -1,54 +0,0 @@ | |||
| 1 | // Copyright 2018 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/assert.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "common/logging/log.h" | ||
| 8 | #include "video_core/engines/shader_bytecode.h" | ||
| 9 | #include "video_core/shader/node_helper.h" | ||
| 10 | #include "video_core/shader/shader_ir.h" | ||
| 11 | |||
| 12 | namespace VideoCommon::Shader { | ||
| 13 | |||
| 14 | using Tegra::Shader::Instruction; | ||
| 15 | using Tegra::Shader::OpCode; | ||
| 16 | |||
| 17 | u32 ShaderIR::DecodeArithmeticHalfImmediate(NodeBlock& bb, u32 pc) { | ||
| 18 | const Instruction instr = {program_code[pc]}; | ||
| 19 | const auto opcode = OpCode::Decode(instr); | ||
| 20 | |||
| 21 | if (opcode->get().GetId() == OpCode::Id::HADD2_IMM) { | ||
| 22 | if (instr.alu_half_imm.ftz == 0) { | ||
| 23 | LOG_DEBUG(HW_GPU, "{} without FTZ is not implemented", opcode->get().GetName()); | ||
| 24 | } | ||
| 25 | } else { | ||
| 26 | if (instr.alu_half_imm.precision != Tegra::Shader::HalfPrecision::FTZ) { | ||
| 27 | LOG_DEBUG(HW_GPU, "{} without FTZ is not implemented", opcode->get().GetName()); | ||
| 28 | } | ||
| 29 | } | ||
| 30 | |||
| 31 | Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.alu_half_imm.type_a); | ||
| 32 | op_a = GetOperandAbsNegHalf(op_a, instr.alu_half_imm.abs_a, instr.alu_half_imm.negate_a); | ||
| 33 | |||
| 34 | const Node op_b = UnpackHalfImmediate(instr, true); | ||
| 35 | |||
| 36 | Node value = [&]() { | ||
| 37 | switch (opcode->get().GetId()) { | ||
| 38 | case OpCode::Id::HADD2_IMM: | ||
| 39 | return Operation(OperationCode::HAdd, PRECISE, op_a, op_b); | ||
| 40 | case OpCode::Id::HMUL2_IMM: | ||
| 41 | return Operation(OperationCode::HMul, PRECISE, op_a, op_b); | ||
| 42 | default: | ||
| 43 | UNREACHABLE(); | ||
| 44 | return Immediate(0); | ||
| 45 | } | ||
| 46 | }(); | ||
| 47 | |||
| 48 | value = GetSaturatedHalfFloat(value, instr.alu_half_imm.saturate); | ||
| 49 | value = HalfMerge(GetRegister(instr.gpr0), value, instr.alu_half_imm.merge); | ||
| 50 | SetRegister(bb, instr.gpr0, value); | ||
| 51 | return pc; | ||
| 52 | } | ||
| 53 | |||
| 54 | } // namespace VideoCommon::Shader | ||
diff --git a/src/video_core/shader/decode/arithmetic_immediate.cpp b/src/video_core/shader/decode/arithmetic_immediate.cpp deleted file mode 100644 index f1875967c..000000000 --- a/src/video_core/shader/decode/arithmetic_immediate.cpp +++ /dev/null | |||
| @@ -1,53 +0,0 @@ | |||
| 1 | // Copyright 2018 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/assert.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "video_core/engines/shader_bytecode.h" | ||
| 8 | #include "video_core/shader/node_helper.h" | ||
| 9 | #include "video_core/shader/shader_ir.h" | ||
| 10 | |||
| 11 | namespace VideoCommon::Shader { | ||
| 12 | |||
| 13 | using Tegra::Shader::Instruction; | ||
| 14 | using Tegra::Shader::OpCode; | ||
| 15 | |||
| 16 | u32 ShaderIR::DecodeArithmeticImmediate(NodeBlock& bb, u32 pc) { | ||
| 17 | const Instruction instr = {program_code[pc]}; | ||
| 18 | const auto opcode = OpCode::Decode(instr); | ||
| 19 | |||
| 20 | switch (opcode->get().GetId()) { | ||
| 21 | case OpCode::Id::MOV32_IMM: { | ||
| 22 | SetRegister(bb, instr.gpr0, GetImmediate32(instr)); | ||
| 23 | break; | ||
| 24 | } | ||
| 25 | case OpCode::Id::FMUL32_IMM: { | ||
| 26 | Node value = | ||
| 27 | Operation(OperationCode::FMul, PRECISE, GetRegister(instr.gpr8), GetImmediate32(instr)); | ||
| 28 | value = GetSaturatedFloat(value, instr.fmul32.saturate); | ||
| 29 | |||
| 30 | SetInternalFlagsFromFloat(bb, value, instr.op_32.generates_cc); | ||
| 31 | SetRegister(bb, instr.gpr0, value); | ||
| 32 | break; | ||
| 33 | } | ||
| 34 | case OpCode::Id::FADD32I: { | ||
| 35 | const Node op_a = GetOperandAbsNegFloat(GetRegister(instr.gpr8), instr.fadd32i.abs_a, | ||
| 36 | instr.fadd32i.negate_a); | ||
| 37 | const Node op_b = GetOperandAbsNegFloat(GetImmediate32(instr), instr.fadd32i.abs_b, | ||
| 38 | instr.fadd32i.negate_b); | ||
| 39 | |||
| 40 | const Node value = Operation(OperationCode::FAdd, PRECISE, op_a, op_b); | ||
| 41 | SetInternalFlagsFromFloat(bb, value, instr.op_32.generates_cc); | ||
| 42 | SetRegister(bb, instr.gpr0, value); | ||
| 43 | break; | ||
| 44 | } | ||
| 45 | default: | ||
| 46 | UNIMPLEMENTED_MSG("Unhandled arithmetic immediate instruction: {}", | ||
| 47 | opcode->get().GetName()); | ||
| 48 | } | ||
| 49 | |||
| 50 | return pc; | ||
| 51 | } | ||
| 52 | |||
| 53 | } // namespace VideoCommon::Shader | ||
diff --git a/src/video_core/shader/decode/arithmetic_integer.cpp b/src/video_core/shader/decode/arithmetic_integer.cpp deleted file mode 100644 index 7b5bb7003..000000000 --- a/src/video_core/shader/decode/arithmetic_integer.cpp +++ /dev/null | |||
| @@ -1,375 +0,0 @@ | |||
| 1 | // Copyright 2018 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/assert.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "video_core/engines/shader_bytecode.h" | ||
| 8 | #include "video_core/shader/node_helper.h" | ||
| 9 | #include "video_core/shader/shader_ir.h" | ||
| 10 | |||
| 11 | namespace VideoCommon::Shader { | ||
| 12 | |||
| 13 | using Tegra::Shader::IAdd3Height; | ||
| 14 | using Tegra::Shader::Instruction; | ||
| 15 | using Tegra::Shader::OpCode; | ||
| 16 | using Tegra::Shader::Pred; | ||
| 17 | using Tegra::Shader::Register; | ||
| 18 | |||
| 19 | u32 ShaderIR::DecodeArithmeticInteger(NodeBlock& bb, u32 pc) { | ||
| 20 | const Instruction instr = {program_code[pc]}; | ||
| 21 | const auto opcode = OpCode::Decode(instr); | ||
| 22 | |||
| 23 | Node op_a = GetRegister(instr.gpr8); | ||
| 24 | Node op_b = [&]() { | ||
| 25 | if (instr.is_b_imm) { | ||
| 26 | return Immediate(instr.alu.GetSignedImm20_20()); | ||
| 27 | } else if (instr.is_b_gpr) { | ||
| 28 | return GetRegister(instr.gpr20); | ||
| 29 | } else { | ||
| 30 | return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()); | ||
| 31 | } | ||
| 32 | }(); | ||
| 33 | |||
| 34 | switch (opcode->get().GetId()) { | ||
| 35 | case OpCode::Id::IADD_C: | ||
| 36 | case OpCode::Id::IADD_R: | ||
| 37 | case OpCode::Id::IADD_IMM: { | ||
| 38 | UNIMPLEMENTED_IF_MSG(instr.alu.saturate_d, "IADD.SAT"); | ||
| 39 | UNIMPLEMENTED_IF_MSG(instr.iadd.x && instr.generates_cc, "IADD.X Rd.CC"); | ||
| 40 | |||
| 41 | op_a = GetOperandAbsNegInteger(op_a, false, instr.alu_integer.negate_a, true); | ||
| 42 | op_b = GetOperandAbsNegInteger(op_b, false, instr.alu_integer.negate_b, true); | ||
| 43 | |||
| 44 | Node value = Operation(OperationCode::UAdd, op_a, op_b); | ||
| 45 | |||
| 46 | if (instr.iadd.x) { | ||
| 47 | Node carry = GetInternalFlag(InternalFlag::Carry); | ||
| 48 | Node x = Operation(OperationCode::Select, std::move(carry), Immediate(1), Immediate(0)); | ||
| 49 | value = Operation(OperationCode::UAdd, std::move(value), std::move(x)); | ||
| 50 | } | ||
| 51 | |||
| 52 | if (instr.generates_cc) { | ||
| 53 | const Node i0 = Immediate(0); | ||
| 54 | |||
| 55 | Node zero = Operation(OperationCode::LogicalIEqual, value, i0); | ||
| 56 | Node sign = Operation(OperationCode::LogicalILessThan, value, i0); | ||
| 57 | Node carry = Operation(OperationCode::LogicalAddCarry, op_a, op_b); | ||
| 58 | |||
| 59 | Node pos_a = Operation(OperationCode::LogicalIGreaterThan, op_a, i0); | ||
| 60 | Node pos_b = Operation(OperationCode::LogicalIGreaterThan, op_b, i0); | ||
| 61 | Node pos = Operation(OperationCode::LogicalAnd, std::move(pos_a), std::move(pos_b)); | ||
| 62 | Node overflow = Operation(OperationCode::LogicalAnd, pos, sign); | ||
| 63 | |||
| 64 | SetInternalFlag(bb, InternalFlag::Zero, std::move(zero)); | ||
| 65 | SetInternalFlag(bb, InternalFlag::Sign, std::move(sign)); | ||
| 66 | SetInternalFlag(bb, InternalFlag::Carry, std::move(carry)); | ||
| 67 | SetInternalFlag(bb, InternalFlag::Overflow, std::move(overflow)); | ||
| 68 | } | ||
| 69 | SetRegister(bb, instr.gpr0, std::move(value)); | ||
| 70 | break; | ||
| 71 | } | ||
| 72 | case OpCode::Id::IADD3_C: | ||
| 73 | case OpCode::Id::IADD3_R: | ||
| 74 | case OpCode::Id::IADD3_IMM: { | ||
| 75 | Node op_c = GetRegister(instr.gpr39); | ||
| 76 | |||
| 77 | const auto ApplyHeight = [&](IAdd3Height height, Node value) { | ||
| 78 | switch (height) { | ||
| 79 | case IAdd3Height::None: | ||
| 80 | return value; | ||
| 81 | case IAdd3Height::LowerHalfWord: | ||
| 82 | return BitfieldExtract(value, 0, 16); | ||
| 83 | case IAdd3Height::UpperHalfWord: | ||
| 84 | return BitfieldExtract(value, 16, 16); | ||
| 85 | default: | ||
| 86 | UNIMPLEMENTED_MSG("Unhandled IADD3 height: {}", height); | ||
| 87 | return Immediate(0); | ||
| 88 | } | ||
| 89 | }; | ||
| 90 | |||
| 91 | if (opcode->get().GetId() == OpCode::Id::IADD3_R) { | ||
| 92 | op_a = ApplyHeight(instr.iadd3.height_a, op_a); | ||
| 93 | op_b = ApplyHeight(instr.iadd3.height_b, op_b); | ||
| 94 | op_c = ApplyHeight(instr.iadd3.height_c, op_c); | ||
| 95 | } | ||
| 96 | |||
| 97 | op_a = GetOperandAbsNegInteger(op_a, false, instr.iadd3.neg_a, true); | ||
| 98 | op_b = GetOperandAbsNegInteger(op_b, false, instr.iadd3.neg_b, true); | ||
| 99 | op_c = GetOperandAbsNegInteger(op_c, false, instr.iadd3.neg_c, true); | ||
| 100 | |||
| 101 | const Node value = [&] { | ||
| 102 | Node add_ab = Operation(OperationCode::IAdd, NO_PRECISE, op_a, op_b); | ||
| 103 | if (opcode->get().GetId() != OpCode::Id::IADD3_R) { | ||
| 104 | return Operation(OperationCode::IAdd, NO_PRECISE, add_ab, op_c); | ||
| 105 | } | ||
| 106 | const Node shifted = [&] { | ||
| 107 | switch (instr.iadd3.mode) { | ||
| 108 | case Tegra::Shader::IAdd3Mode::RightShift: | ||
| 109 | // TODO(tech4me): According to | ||
| 110 | // https://envytools.readthedocs.io/en/latest/hw/graph/maxwell/cuda/int.html?highlight=iadd3 | ||
| 111 | // The addition between op_a and op_b should be done in uint33, more | ||
| 112 | // investigation required | ||
| 113 | return Operation(OperationCode::ILogicalShiftRight, NO_PRECISE, add_ab, | ||
| 114 | Immediate(16)); | ||
| 115 | case Tegra::Shader::IAdd3Mode::LeftShift: | ||
| 116 | return Operation(OperationCode::ILogicalShiftLeft, NO_PRECISE, add_ab, | ||
| 117 | Immediate(16)); | ||
| 118 | default: | ||
| 119 | return add_ab; | ||
| 120 | } | ||
| 121 | }(); | ||
| 122 | return Operation(OperationCode::IAdd, NO_PRECISE, shifted, op_c); | ||
| 123 | }(); | ||
| 124 | |||
| 125 | SetInternalFlagsFromInteger(bb, value, instr.generates_cc); | ||
| 126 | SetRegister(bb, instr.gpr0, value); | ||
| 127 | break; | ||
| 128 | } | ||
| 129 | case OpCode::Id::ISCADD_C: | ||
| 130 | case OpCode::Id::ISCADD_R: | ||
| 131 | case OpCode::Id::ISCADD_IMM: { | ||
| 132 | UNIMPLEMENTED_IF_MSG(instr.generates_cc, | ||
| 133 | "Condition codes generation in ISCADD is not implemented"); | ||
| 134 | |||
| 135 | op_a = GetOperandAbsNegInteger(op_a, false, instr.alu_integer.negate_a, true); | ||
| 136 | op_b = GetOperandAbsNegInteger(op_b, false, instr.alu_integer.negate_b, true); | ||
| 137 | |||
| 138 | const Node shift = Immediate(static_cast<u32>(instr.alu_integer.shift_amount)); | ||
| 139 | const Node shifted_a = Operation(OperationCode::ILogicalShiftLeft, NO_PRECISE, op_a, shift); | ||
| 140 | const Node value = Operation(OperationCode::IAdd, NO_PRECISE, shifted_a, op_b); | ||
| 141 | |||
| 142 | SetInternalFlagsFromInteger(bb, value, instr.generates_cc); | ||
| 143 | SetRegister(bb, instr.gpr0, value); | ||
| 144 | break; | ||
| 145 | } | ||
| 146 | case OpCode::Id::POPC_C: | ||
| 147 | case OpCode::Id::POPC_R: | ||
| 148 | case OpCode::Id::POPC_IMM: { | ||
| 149 | if (instr.popc.invert) { | ||
| 150 | op_b = Operation(OperationCode::IBitwiseNot, NO_PRECISE, op_b); | ||
| 151 | } | ||
| 152 | const Node value = Operation(OperationCode::IBitCount, PRECISE, op_b); | ||
| 153 | SetRegister(bb, instr.gpr0, value); | ||
| 154 | break; | ||
| 155 | } | ||
| 156 | case OpCode::Id::FLO_R: | ||
| 157 | case OpCode::Id::FLO_C: | ||
| 158 | case OpCode::Id::FLO_IMM: { | ||
| 159 | Node value; | ||
| 160 | if (instr.flo.invert) { | ||
| 161 | op_b = Operation(OperationCode::IBitwiseNot, NO_PRECISE, std::move(op_b)); | ||
| 162 | } | ||
| 163 | if (instr.flo.is_signed) { | ||
| 164 | value = Operation(OperationCode::IBitMSB, NO_PRECISE, std::move(op_b)); | ||
| 165 | } else { | ||
| 166 | value = Operation(OperationCode::UBitMSB, NO_PRECISE, std::move(op_b)); | ||
| 167 | } | ||
| 168 | if (instr.flo.sh) { | ||
| 169 | value = | ||
| 170 | Operation(OperationCode::UBitwiseXor, NO_PRECISE, std::move(value), Immediate(31)); | ||
| 171 | } | ||
| 172 | SetRegister(bb, instr.gpr0, std::move(value)); | ||
| 173 | break; | ||
| 174 | } | ||
| 175 | case OpCode::Id::SEL_C: | ||
| 176 | case OpCode::Id::SEL_R: | ||
| 177 | case OpCode::Id::SEL_IMM: { | ||
| 178 | const Node condition = GetPredicate(instr.sel.pred, instr.sel.neg_pred != 0); | ||
| 179 | const Node value = Operation(OperationCode::Select, PRECISE, condition, op_a, op_b); | ||
| 180 | SetRegister(bb, instr.gpr0, value); | ||
| 181 | break; | ||
| 182 | } | ||
| 183 | case OpCode::Id::ICMP_CR: | ||
| 184 | case OpCode::Id::ICMP_R: | ||
| 185 | case OpCode::Id::ICMP_RC: | ||
| 186 | case OpCode::Id::ICMP_IMM: { | ||
| 187 | const Node zero = Immediate(0); | ||
| 188 | |||
| 189 | const auto [op_rhs, test] = [&]() -> std::pair<Node, Node> { | ||
| 190 | switch (opcode->get().GetId()) { | ||
| 191 | case OpCode::Id::ICMP_CR: | ||
| 192 | return {GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()), | ||
| 193 | GetRegister(instr.gpr39)}; | ||
| 194 | case OpCode::Id::ICMP_R: | ||
| 195 | return {GetRegister(instr.gpr20), GetRegister(instr.gpr39)}; | ||
| 196 | case OpCode::Id::ICMP_RC: | ||
| 197 | return {GetRegister(instr.gpr39), | ||
| 198 | GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset())}; | ||
| 199 | case OpCode::Id::ICMP_IMM: | ||
| 200 | return {Immediate(instr.alu.GetSignedImm20_20()), GetRegister(instr.gpr39)}; | ||
| 201 | default: | ||
| 202 | UNREACHABLE(); | ||
| 203 | return {zero, zero}; | ||
| 204 | } | ||
| 205 | }(); | ||
| 206 | const Node op_lhs = GetRegister(instr.gpr8); | ||
| 207 | const Node comparison = | ||
| 208 | GetPredicateComparisonInteger(instr.icmp.cond, instr.icmp.is_signed != 0, test, zero); | ||
| 209 | SetRegister(bb, instr.gpr0, Operation(OperationCode::Select, comparison, op_lhs, op_rhs)); | ||
| 210 | break; | ||
| 211 | } | ||
| 212 | case OpCode::Id::LOP_C: | ||
| 213 | case OpCode::Id::LOP_R: | ||
| 214 | case OpCode::Id::LOP_IMM: { | ||
| 215 | if (instr.alu.lop.invert_a) | ||
| 216 | op_a = Operation(OperationCode::IBitwiseNot, NO_PRECISE, op_a); | ||
| 217 | if (instr.alu.lop.invert_b) | ||
| 218 | op_b = Operation(OperationCode::IBitwiseNot, NO_PRECISE, op_b); | ||
| 219 | |||
| 220 | WriteLogicOperation(bb, instr.gpr0, instr.alu.lop.operation, op_a, op_b, | ||
| 221 | instr.alu.lop.pred_result_mode, instr.alu.lop.pred48, | ||
| 222 | instr.generates_cc); | ||
| 223 | break; | ||
| 224 | } | ||
| 225 | case OpCode::Id::LOP3_C: | ||
| 226 | case OpCode::Id::LOP3_R: | ||
| 227 | case OpCode::Id::LOP3_IMM: { | ||
| 228 | const Node op_c = GetRegister(instr.gpr39); | ||
| 229 | const Node lut = [&]() { | ||
| 230 | if (opcode->get().GetId() == OpCode::Id::LOP3_R) { | ||
| 231 | return Immediate(instr.alu.lop3.GetImmLut28()); | ||
| 232 | } else { | ||
| 233 | return Immediate(instr.alu.lop3.GetImmLut48()); | ||
| 234 | } | ||
| 235 | }(); | ||
| 236 | |||
| 237 | WriteLop3Instruction(bb, instr.gpr0, op_a, op_b, op_c, lut, instr.generates_cc); | ||
| 238 | break; | ||
| 239 | } | ||
| 240 | case OpCode::Id::IMNMX_C: | ||
| 241 | case OpCode::Id::IMNMX_R: | ||
| 242 | case OpCode::Id::IMNMX_IMM: { | ||
| 243 | UNIMPLEMENTED_IF(instr.imnmx.exchange != Tegra::Shader::IMinMaxExchange::None); | ||
| 244 | |||
| 245 | const bool is_signed = instr.imnmx.is_signed; | ||
| 246 | |||
| 247 | const Node condition = GetPredicate(instr.imnmx.pred, instr.imnmx.negate_pred != 0); | ||
| 248 | const Node min = SignedOperation(OperationCode::IMin, is_signed, NO_PRECISE, op_a, op_b); | ||
| 249 | const Node max = SignedOperation(OperationCode::IMax, is_signed, NO_PRECISE, op_a, op_b); | ||
| 250 | const Node value = Operation(OperationCode::Select, NO_PRECISE, condition, min, max); | ||
| 251 | |||
| 252 | SetInternalFlagsFromInteger(bb, value, instr.generates_cc); | ||
| 253 | SetRegister(bb, instr.gpr0, value); | ||
| 254 | break; | ||
| 255 | } | ||
| 256 | case OpCode::Id::LEA_R2: | ||
| 257 | case OpCode::Id::LEA_R1: | ||
| 258 | case OpCode::Id::LEA_IMM: | ||
| 259 | case OpCode::Id::LEA_RZ: | ||
| 260 | case OpCode::Id::LEA_HI: { | ||
| 261 | auto [op_a_, op_b_, op_c_] = [&]() -> std::tuple<Node, Node, Node> { | ||
| 262 | switch (opcode->get().GetId()) { | ||
| 263 | case OpCode::Id::LEA_R2: { | ||
| 264 | return {GetRegister(instr.gpr20), GetRegister(instr.gpr39), | ||
| 265 | Immediate(static_cast<u32>(instr.lea.r2.entry_a))}; | ||
| 266 | } | ||
| 267 | case OpCode::Id::LEA_R1: { | ||
| 268 | const bool neg = instr.lea.r1.neg != 0; | ||
| 269 | return {GetOperandAbsNegInteger(GetRegister(instr.gpr8), false, neg, true), | ||
| 270 | GetRegister(instr.gpr20), | ||
| 271 | Immediate(static_cast<u32>(instr.lea.r1.entry_a))}; | ||
| 272 | } | ||
| 273 | case OpCode::Id::LEA_IMM: { | ||
| 274 | const bool neg = instr.lea.imm.neg != 0; | ||
| 275 | return {GetOperandAbsNegInteger(GetRegister(instr.gpr8), false, neg, true), | ||
| 276 | Immediate(static_cast<u32>(instr.lea.imm.entry_a)), | ||
| 277 | Immediate(static_cast<u32>(instr.lea.imm.entry_b))}; | ||
| 278 | } | ||
| 279 | case OpCode::Id::LEA_RZ: { | ||
| 280 | const bool neg = instr.lea.rz.neg != 0; | ||
| 281 | return {GetConstBuffer(instr.lea.rz.cb_index, instr.lea.rz.cb_offset), | ||
| 282 | GetOperandAbsNegInteger(GetRegister(instr.gpr8), false, neg, true), | ||
| 283 | Immediate(static_cast<u32>(instr.lea.rz.entry_a))}; | ||
| 284 | } | ||
| 285 | case OpCode::Id::LEA_HI: | ||
| 286 | default: | ||
| 287 | UNIMPLEMENTED_MSG("Unhandled LEA subinstruction: {}", opcode->get().GetName()); | ||
| 288 | |||
| 289 | return {Immediate(static_cast<u32>(instr.lea.imm.entry_a)), GetRegister(instr.gpr8), | ||
| 290 | Immediate(static_cast<u32>(instr.lea.imm.entry_b))}; | ||
| 291 | } | ||
| 292 | }(); | ||
| 293 | |||
| 294 | UNIMPLEMENTED_IF_MSG(instr.lea.pred48 != static_cast<u64>(Pred::UnusedIndex), | ||
| 295 | "Unhandled LEA Predicate"); | ||
| 296 | |||
| 297 | Node value = | ||
| 298 | Operation(OperationCode::ILogicalShiftLeft, std::move(op_a_), std::move(op_c_)); | ||
| 299 | value = Operation(OperationCode::IAdd, std::move(op_b_), std::move(value)); | ||
| 300 | SetRegister(bb, instr.gpr0, std::move(value)); | ||
| 301 | |||
| 302 | break; | ||
| 303 | } | ||
| 304 | default: | ||
| 305 | UNIMPLEMENTED_MSG("Unhandled ArithmeticInteger instruction: {}", opcode->get().GetName()); | ||
| 306 | } | ||
| 307 | |||
| 308 | return pc; | ||
| 309 | } | ||
| 310 | |||
| 311 | void ShaderIR::WriteLop3Instruction(NodeBlock& bb, Register dest, Node op_a, Node op_b, Node op_c, | ||
| 312 | Node imm_lut, bool sets_cc) { | ||
| 313 | const Node lop3_fast = [&](const Node na, const Node nb, const Node nc, const Node ttbl) { | ||
| 314 | Node value = Immediate(0); | ||
| 315 | const ImmediateNode imm = std::get<ImmediateNode>(*ttbl); | ||
| 316 | if (imm.GetValue() & 0x01) { | ||
| 317 | const Node a = Operation(OperationCode::IBitwiseNot, na); | ||
| 318 | const Node b = Operation(OperationCode::IBitwiseNot, nb); | ||
| 319 | const Node c = Operation(OperationCode::IBitwiseNot, nc); | ||
| 320 | Node r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, a, b); | ||
| 321 | r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, r, c); | ||
| 322 | value = Operation(OperationCode::IBitwiseOr, value, r); | ||
| 323 | } | ||
| 324 | if (imm.GetValue() & 0x02) { | ||
| 325 | const Node a = Operation(OperationCode::IBitwiseNot, na); | ||
| 326 | const Node b = Operation(OperationCode::IBitwiseNot, nb); | ||
| 327 | Node r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, a, b); | ||
| 328 | r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, r, nc); | ||
| 329 | value = Operation(OperationCode::IBitwiseOr, value, r); | ||
| 330 | } | ||
| 331 | if (imm.GetValue() & 0x04) { | ||
| 332 | const Node a = Operation(OperationCode::IBitwiseNot, na); | ||
| 333 | const Node c = Operation(OperationCode::IBitwiseNot, nc); | ||
| 334 | Node r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, a, nb); | ||
| 335 | r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, r, c); | ||
| 336 | value = Operation(OperationCode::IBitwiseOr, value, r); | ||
| 337 | } | ||
| 338 | if (imm.GetValue() & 0x08) { | ||
| 339 | const Node a = Operation(OperationCode::IBitwiseNot, na); | ||
| 340 | Node r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, a, nb); | ||
| 341 | r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, r, nc); | ||
| 342 | value = Operation(OperationCode::IBitwiseOr, value, r); | ||
| 343 | } | ||
| 344 | if (imm.GetValue() & 0x10) { | ||
| 345 | const Node b = Operation(OperationCode::IBitwiseNot, nb); | ||
| 346 | const Node c = Operation(OperationCode::IBitwiseNot, nc); | ||
| 347 | Node r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, na, b); | ||
| 348 | r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, r, c); | ||
| 349 | value = Operation(OperationCode::IBitwiseOr, value, r); | ||
| 350 | } | ||
| 351 | if (imm.GetValue() & 0x20) { | ||
| 352 | const Node b = Operation(OperationCode::IBitwiseNot, nb); | ||
| 353 | Node r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, na, b); | ||
| 354 | r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, r, nc); | ||
| 355 | value = Operation(OperationCode::IBitwiseOr, value, r); | ||
| 356 | } | ||
| 357 | if (imm.GetValue() & 0x40) { | ||
| 358 | const Node c = Operation(OperationCode::IBitwiseNot, nc); | ||
| 359 | Node r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, na, nb); | ||
| 360 | r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, r, c); | ||
| 361 | value = Operation(OperationCode::IBitwiseOr, value, r); | ||
| 362 | } | ||
| 363 | if (imm.GetValue() & 0x80) { | ||
| 364 | Node r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, na, nb); | ||
| 365 | r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, r, nc); | ||
| 366 | value = Operation(OperationCode::IBitwiseOr, value, r); | ||
| 367 | } | ||
| 368 | return value; | ||
| 369 | }(op_a, op_b, op_c, imm_lut); | ||
| 370 | |||
| 371 | SetInternalFlagsFromInteger(bb, lop3_fast, sets_cc); | ||
| 372 | SetRegister(bb, dest, lop3_fast); | ||
| 373 | } | ||
| 374 | |||
| 375 | } // namespace VideoCommon::Shader | ||
diff --git a/src/video_core/shader/decode/arithmetic_integer_immediate.cpp b/src/video_core/shader/decode/arithmetic_integer_immediate.cpp deleted file mode 100644 index 73580277a..000000000 --- a/src/video_core/shader/decode/arithmetic_integer_immediate.cpp +++ /dev/null | |||
| @@ -1,99 +0,0 @@ | |||
| 1 | // Copyright 2018 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/assert.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "video_core/engines/shader_bytecode.h" | ||
| 8 | #include "video_core/shader/node_helper.h" | ||
| 9 | #include "video_core/shader/shader_ir.h" | ||
| 10 | |||
| 11 | namespace VideoCommon::Shader { | ||
| 12 | |||
| 13 | using Tegra::Shader::Instruction; | ||
| 14 | using Tegra::Shader::LogicOperation; | ||
| 15 | using Tegra::Shader::OpCode; | ||
| 16 | using Tegra::Shader::Pred; | ||
| 17 | using Tegra::Shader::PredicateResultMode; | ||
| 18 | using Tegra::Shader::Register; | ||
| 19 | |||
| 20 | u32 ShaderIR::DecodeArithmeticIntegerImmediate(NodeBlock& bb, u32 pc) { | ||
| 21 | const Instruction instr = {program_code[pc]}; | ||
| 22 | const auto opcode = OpCode::Decode(instr); | ||
| 23 | |||
| 24 | Node op_a = GetRegister(instr.gpr8); | ||
| 25 | Node op_b = Immediate(static_cast<s32>(instr.alu.imm20_32)); | ||
| 26 | |||
| 27 | switch (opcode->get().GetId()) { | ||
| 28 | case OpCode::Id::IADD32I: { | ||
| 29 | UNIMPLEMENTED_IF_MSG(instr.iadd32i.saturate, "IADD32I saturation is not implemented"); | ||
| 30 | |||
| 31 | op_a = GetOperandAbsNegInteger(std::move(op_a), false, instr.iadd32i.negate_a != 0, true); | ||
| 32 | |||
| 33 | Node value = Operation(OperationCode::IAdd, PRECISE, std::move(op_a), std::move(op_b)); | ||
| 34 | |||
| 35 | SetInternalFlagsFromInteger(bb, value, instr.op_32.generates_cc != 0); | ||
| 36 | SetRegister(bb, instr.gpr0, std::move(value)); | ||
| 37 | break; | ||
| 38 | } | ||
| 39 | case OpCode::Id::LOP32I: { | ||
| 40 | if (instr.alu.lop32i.invert_a) { | ||
| 41 | op_a = Operation(OperationCode::IBitwiseNot, NO_PRECISE, std::move(op_a)); | ||
| 42 | } | ||
| 43 | |||
| 44 | if (instr.alu.lop32i.invert_b) { | ||
| 45 | op_b = Operation(OperationCode::IBitwiseNot, NO_PRECISE, std::move(op_b)); | ||
| 46 | } | ||
| 47 | |||
| 48 | WriteLogicOperation(bb, instr.gpr0, instr.alu.lop32i.operation, std::move(op_a), | ||
| 49 | std::move(op_b), PredicateResultMode::None, Pred::UnusedIndex, | ||
| 50 | instr.op_32.generates_cc != 0); | ||
| 51 | break; | ||
| 52 | } | ||
| 53 | default: | ||
| 54 | UNIMPLEMENTED_MSG("Unhandled ArithmeticIntegerImmediate instruction: {}", | ||
| 55 | opcode->get().GetName()); | ||
| 56 | } | ||
| 57 | |||
| 58 | return pc; | ||
| 59 | } | ||
| 60 | |||
| 61 | void ShaderIR::WriteLogicOperation(NodeBlock& bb, Register dest, LogicOperation logic_op, Node op_a, | ||
| 62 | Node op_b, PredicateResultMode predicate_mode, Pred predicate, | ||
| 63 | bool sets_cc) { | ||
| 64 | Node result = [&] { | ||
| 65 | switch (logic_op) { | ||
| 66 | case LogicOperation::And: | ||
| 67 | return Operation(OperationCode::IBitwiseAnd, PRECISE, std::move(op_a), std::move(op_b)); | ||
| 68 | case LogicOperation::Or: | ||
| 69 | return Operation(OperationCode::IBitwiseOr, PRECISE, std::move(op_a), std::move(op_b)); | ||
| 70 | case LogicOperation::Xor: | ||
| 71 | return Operation(OperationCode::IBitwiseXor, PRECISE, std::move(op_a), std::move(op_b)); | ||
| 72 | case LogicOperation::PassB: | ||
| 73 | return op_b; | ||
| 74 | default: | ||
| 75 | UNIMPLEMENTED_MSG("Unimplemented logic operation={}", logic_op); | ||
| 76 | return Immediate(0); | ||
| 77 | } | ||
| 78 | }(); | ||
| 79 | |||
| 80 | SetInternalFlagsFromInteger(bb, result, sets_cc); | ||
| 81 | SetRegister(bb, dest, result); | ||
| 82 | |||
| 83 | // Write the predicate value depending on the predicate mode. | ||
| 84 | switch (predicate_mode) { | ||
| 85 | case PredicateResultMode::None: | ||
| 86 | // Do nothing. | ||
| 87 | return; | ||
| 88 | case PredicateResultMode::NotZero: { | ||
| 89 | // Set the predicate to true if the result is not zero. | ||
| 90 | Node compare = Operation(OperationCode::LogicalINotEqual, std::move(result), Immediate(0)); | ||
| 91 | SetPredicate(bb, static_cast<u64>(predicate), std::move(compare)); | ||
| 92 | break; | ||
| 93 | } | ||
| 94 | default: | ||
| 95 | UNIMPLEMENTED_MSG("Unimplemented predicate result mode: {}", predicate_mode); | ||
| 96 | } | ||
| 97 | } | ||
| 98 | |||
| 99 | } // namespace VideoCommon::Shader | ||
diff --git a/src/video_core/shader/decode/bfe.cpp b/src/video_core/shader/decode/bfe.cpp deleted file mode 100644 index 8e3b46e8e..000000000 --- a/src/video_core/shader/decode/bfe.cpp +++ /dev/null | |||
| @@ -1,77 +0,0 @@ | |||
| 1 | // Copyright 2018 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/assert.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "video_core/engines/shader_bytecode.h" | ||
| 8 | #include "video_core/shader/node_helper.h" | ||
| 9 | #include "video_core/shader/shader_ir.h" | ||
| 10 | |||
| 11 | namespace VideoCommon::Shader { | ||
| 12 | |||
| 13 | using Tegra::Shader::Instruction; | ||
| 14 | using Tegra::Shader::OpCode; | ||
| 15 | |||
| 16 | u32 ShaderIR::DecodeBfe(NodeBlock& bb, u32 pc) { | ||
| 17 | const Instruction instr = {program_code[pc]}; | ||
| 18 | const auto opcode = OpCode::Decode(instr); | ||
| 19 | |||
| 20 | Node op_a = GetRegister(instr.gpr8); | ||
| 21 | Node op_b = [&] { | ||
| 22 | switch (opcode->get().GetId()) { | ||
| 23 | case OpCode::Id::BFE_R: | ||
| 24 | return GetRegister(instr.gpr20); | ||
| 25 | case OpCode::Id::BFE_C: | ||
| 26 | return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()); | ||
| 27 | case OpCode::Id::BFE_IMM: | ||
| 28 | return Immediate(instr.alu.GetSignedImm20_20()); | ||
| 29 | default: | ||
| 30 | UNREACHABLE(); | ||
| 31 | return Immediate(0); | ||
| 32 | } | ||
| 33 | }(); | ||
| 34 | |||
| 35 | UNIMPLEMENTED_IF_MSG(instr.bfe.rd_cc, "Condition codes in BFE is not implemented"); | ||
| 36 | |||
| 37 | const bool is_signed = instr.bfe.is_signed; | ||
| 38 | |||
| 39 | // using reverse parallel method in | ||
| 40 | // https://graphics.stanford.edu/~seander/bithacks.html#ReverseParallel | ||
| 41 | // note for later if possible to implement faster method. | ||
| 42 | if (instr.bfe.brev) { | ||
| 43 | const auto swap = [&](u32 s, u32 mask) { | ||
| 44 | Node v1 = | ||
| 45 | SignedOperation(OperationCode::ILogicalShiftRight, is_signed, op_a, Immediate(s)); | ||
| 46 | if (mask != 0) { | ||
| 47 | v1 = SignedOperation(OperationCode::IBitwiseAnd, is_signed, std::move(v1), | ||
| 48 | Immediate(mask)); | ||
| 49 | } | ||
| 50 | Node v2 = op_a; | ||
| 51 | if (mask != 0) { | ||
| 52 | v2 = SignedOperation(OperationCode::IBitwiseAnd, is_signed, std::move(v2), | ||
| 53 | Immediate(mask)); | ||
| 54 | } | ||
| 55 | v2 = SignedOperation(OperationCode::ILogicalShiftLeft, is_signed, std::move(v2), | ||
| 56 | Immediate(s)); | ||
| 57 | return SignedOperation(OperationCode::IBitwiseOr, is_signed, std::move(v1), | ||
| 58 | std::move(v2)); | ||
| 59 | }; | ||
| 60 | op_a = swap(1, 0x55555555U); | ||
| 61 | op_a = swap(2, 0x33333333U); | ||
| 62 | op_a = swap(4, 0x0F0F0F0FU); | ||
| 63 | op_a = swap(8, 0x00FF00FFU); | ||
| 64 | op_a = swap(16, 0); | ||
| 65 | } | ||
| 66 | |||
| 67 | const auto offset = SignedOperation(OperationCode::IBitfieldExtract, is_signed, op_b, | ||
| 68 | Immediate(0), Immediate(8)); | ||
| 69 | const auto bits = SignedOperation(OperationCode::IBitfieldExtract, is_signed, op_b, | ||
| 70 | Immediate(8), Immediate(8)); | ||
| 71 | auto result = SignedOperation(OperationCode::IBitfieldExtract, is_signed, op_a, offset, bits); | ||
| 72 | SetRegister(bb, instr.gpr0, std::move(result)); | ||
| 73 | |||
| 74 | return pc; | ||
| 75 | } | ||
| 76 | |||
| 77 | } // namespace VideoCommon::Shader | ||
diff --git a/src/video_core/shader/decode/bfi.cpp b/src/video_core/shader/decode/bfi.cpp deleted file mode 100644 index 70d1c055b..000000000 --- a/src/video_core/shader/decode/bfi.cpp +++ /dev/null | |||
| @@ -1,45 +0,0 @@ | |||
| 1 | // Copyright 2018 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/assert.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "video_core/engines/shader_bytecode.h" | ||
| 8 | #include "video_core/shader/node_helper.h" | ||
| 9 | #include "video_core/shader/shader_ir.h" | ||
| 10 | |||
| 11 | namespace VideoCommon::Shader { | ||
| 12 | |||
| 13 | using Tegra::Shader::Instruction; | ||
| 14 | using Tegra::Shader::OpCode; | ||
| 15 | |||
| 16 | u32 ShaderIR::DecodeBfi(NodeBlock& bb, u32 pc) { | ||
| 17 | const Instruction instr = {program_code[pc]}; | ||
| 18 | const auto opcode = OpCode::Decode(instr); | ||
| 19 | |||
| 20 | const auto [packed_shift, base] = [&]() -> std::pair<Node, Node> { | ||
| 21 | switch (opcode->get().GetId()) { | ||
| 22 | case OpCode::Id::BFI_RC: | ||
| 23 | return {GetRegister(instr.gpr39), | ||
| 24 | GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset())}; | ||
| 25 | case OpCode::Id::BFI_IMM_R: | ||
| 26 | return {Immediate(instr.alu.GetSignedImm20_20()), GetRegister(instr.gpr39)}; | ||
| 27 | default: | ||
| 28 | UNREACHABLE(); | ||
| 29 | return {Immediate(0), Immediate(0)}; | ||
| 30 | } | ||
| 31 | }(); | ||
| 32 | const Node insert = GetRegister(instr.gpr8); | ||
| 33 | const Node offset = BitfieldExtract(packed_shift, 0, 8); | ||
| 34 | const Node bits = BitfieldExtract(packed_shift, 8, 8); | ||
| 35 | |||
| 36 | const Node value = | ||
| 37 | Operation(OperationCode::UBitfieldInsert, PRECISE, base, insert, offset, bits); | ||
| 38 | |||
| 39 | SetInternalFlagsFromInteger(bb, value, instr.generates_cc); | ||
| 40 | SetRegister(bb, instr.gpr0, value); | ||
| 41 | |||
| 42 | return pc; | ||
| 43 | } | ||
| 44 | |||
| 45 | } // namespace VideoCommon::Shader | ||
diff --git a/src/video_core/shader/decode/conversion.cpp b/src/video_core/shader/decode/conversion.cpp deleted file mode 100644 index fea7a54df..000000000 --- a/src/video_core/shader/decode/conversion.cpp +++ /dev/null | |||
| @@ -1,321 +0,0 @@ | |||
| 1 | // Copyright 2018 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <limits> | ||
| 6 | #include <optional> | ||
| 7 | #include <utility> | ||
| 8 | |||
| 9 | #include "common/assert.h" | ||
| 10 | #include "common/common_types.h" | ||
| 11 | #include "video_core/engines/shader_bytecode.h" | ||
| 12 | #include "video_core/shader/node_helper.h" | ||
| 13 | #include "video_core/shader/shader_ir.h" | ||
| 14 | |||
| 15 | namespace VideoCommon::Shader { | ||
| 16 | |||
| 17 | using Tegra::Shader::Instruction; | ||
| 18 | using Tegra::Shader::OpCode; | ||
| 19 | using Tegra::Shader::Register; | ||
| 20 | |||
| 21 | namespace { | ||
| 22 | |||
| 23 | constexpr OperationCode GetFloatSelector(u64 selector) { | ||
| 24 | return selector == 0 ? OperationCode::FCastHalf0 : OperationCode::FCastHalf1; | ||
| 25 | } | ||
| 26 | |||
| 27 | constexpr u32 SizeInBits(Register::Size size) { | ||
| 28 | switch (size) { | ||
| 29 | case Register::Size::Byte: | ||
| 30 | return 8; | ||
| 31 | case Register::Size::Short: | ||
| 32 | return 16; | ||
| 33 | case Register::Size::Word: | ||
| 34 | return 32; | ||
| 35 | case Register::Size::Long: | ||
| 36 | return 64; | ||
| 37 | } | ||
| 38 | return 0; | ||
| 39 | } | ||
| 40 | |||
| 41 | constexpr std::optional<std::pair<s32, s32>> IntegerSaturateBounds(Register::Size src_size, | ||
| 42 | Register::Size dst_size, | ||
| 43 | bool src_signed, | ||
| 44 | bool dst_signed) { | ||
| 45 | const u32 dst_bits = SizeInBits(dst_size); | ||
| 46 | if (src_size == Register::Size::Word && dst_size == Register::Size::Word) { | ||
| 47 | if (src_signed == dst_signed) { | ||
| 48 | return std::nullopt; | ||
| 49 | } | ||
| 50 | return std::make_pair(0, std::numeric_limits<s32>::max()); | ||
| 51 | } | ||
| 52 | if (dst_signed) { | ||
| 53 | // Signed destination, clamp to [-128, 127] for instance | ||
| 54 | return std::make_pair(-(1 << (dst_bits - 1)), (1 << (dst_bits - 1)) - 1); | ||
| 55 | } else { | ||
| 56 | // Unsigned destination | ||
| 57 | if (dst_bits == 32) { | ||
| 58 | // Avoid shifting by 32, that is undefined behavior | ||
| 59 | return std::make_pair(0, s32(std::numeric_limits<u32>::max())); | ||
| 60 | } | ||
| 61 | return std::make_pair(0, (1 << dst_bits) - 1); | ||
| 62 | } | ||
| 63 | } | ||
| 64 | |||
| 65 | } // Anonymous namespace | ||
| 66 | |||
| 67 | u32 ShaderIR::DecodeConversion(NodeBlock& bb, u32 pc) { | ||
| 68 | const Instruction instr = {program_code[pc]}; | ||
| 69 | const auto opcode = OpCode::Decode(instr); | ||
| 70 | |||
| 71 | switch (opcode->get().GetId()) { | ||
| 72 | case OpCode::Id::I2I_R: | ||
| 73 | case OpCode::Id::I2I_C: | ||
| 74 | case OpCode::Id::I2I_IMM: { | ||
| 75 | const bool src_signed = instr.conversion.is_input_signed; | ||
| 76 | const bool dst_signed = instr.conversion.is_output_signed; | ||
| 77 | const Register::Size src_size = instr.conversion.src_size; | ||
| 78 | const Register::Size dst_size = instr.conversion.dst_size; | ||
| 79 | const u32 selector = static_cast<u32>(instr.conversion.int_src.selector); | ||
| 80 | |||
| 81 | Node value = [this, instr, opcode] { | ||
| 82 | switch (opcode->get().GetId()) { | ||
| 83 | case OpCode::Id::I2I_R: | ||
| 84 | return GetRegister(instr.gpr20); | ||
| 85 | case OpCode::Id::I2I_C: | ||
| 86 | return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()); | ||
| 87 | case OpCode::Id::I2I_IMM: | ||
| 88 | return Immediate(instr.alu.GetSignedImm20_20()); | ||
| 89 | default: | ||
| 90 | UNREACHABLE(); | ||
| 91 | return Immediate(0); | ||
| 92 | } | ||
| 93 | }(); | ||
| 94 | |||
| 95 | // Ensure the source selector is valid | ||
| 96 | switch (instr.conversion.src_size) { | ||
| 97 | case Register::Size::Byte: | ||
| 98 | break; | ||
| 99 | case Register::Size::Short: | ||
| 100 | ASSERT(selector == 0 || selector == 2); | ||
| 101 | break; | ||
| 102 | default: | ||
| 103 | ASSERT(selector == 0); | ||
| 104 | break; | ||
| 105 | } | ||
| 106 | |||
| 107 | if (src_size != Register::Size::Word || selector != 0) { | ||
| 108 | value = SignedOperation(OperationCode::IBitfieldExtract, src_signed, std::move(value), | ||
| 109 | Immediate(selector * 8), Immediate(SizeInBits(src_size))); | ||
| 110 | } | ||
| 111 | |||
| 112 | value = GetOperandAbsNegInteger(std::move(value), instr.conversion.abs_a, | ||
| 113 | instr.conversion.negate_a, src_signed); | ||
| 114 | |||
| 115 | if (instr.alu.saturate_d) { | ||
| 116 | if (src_signed && !dst_signed) { | ||
| 117 | Node is_negative = Operation(OperationCode::LogicalUGreaterEqual, value, | ||
| 118 | Immediate(1 << (SizeInBits(src_size) - 1))); | ||
| 119 | value = Operation(OperationCode::Select, std::move(is_negative), Immediate(0), | ||
| 120 | std::move(value)); | ||
| 121 | |||
| 122 | // Simplify generated expressions, this can be removed without semantic impact | ||
| 123 | SetTemporary(bb, 0, std::move(value)); | ||
| 124 | value = GetTemporary(0); | ||
| 125 | |||
| 126 | if (dst_size != Register::Size::Word) { | ||
| 127 | const Node limit = Immediate((1 << SizeInBits(dst_size)) - 1); | ||
| 128 | Node is_large = | ||
| 129 | Operation(OperationCode::LogicalUGreaterThan, std::move(value), limit); | ||
| 130 | value = Operation(OperationCode::Select, std::move(is_large), limit, | ||
| 131 | std::move(value)); | ||
| 132 | } | ||
| 133 | } else if (const std::optional bounds = | ||
| 134 | IntegerSaturateBounds(src_size, dst_size, src_signed, dst_signed)) { | ||
| 135 | value = SignedOperation(OperationCode::IMax, src_signed, std::move(value), | ||
| 136 | Immediate(bounds->first)); | ||
| 137 | value = SignedOperation(OperationCode::IMin, src_signed, std::move(value), | ||
| 138 | Immediate(bounds->second)); | ||
| 139 | } | ||
| 140 | } else if (dst_size != Register::Size::Word) { | ||
| 141 | // No saturation, we only have to mask the result | ||
| 142 | Node mask = Immediate((1 << SizeInBits(dst_size)) - 1); | ||
| 143 | value = Operation(OperationCode::UBitwiseAnd, std::move(value), std::move(mask)); | ||
| 144 | } | ||
| 145 | |||
| 146 | SetInternalFlagsFromInteger(bb, value, instr.generates_cc); | ||
| 147 | SetRegister(bb, instr.gpr0, std::move(value)); | ||
| 148 | break; | ||
| 149 | } | ||
| 150 | case OpCode::Id::I2F_R: | ||
| 151 | case OpCode::Id::I2F_C: | ||
| 152 | case OpCode::Id::I2F_IMM: { | ||
| 153 | UNIMPLEMENTED_IF(instr.conversion.dst_size == Register::Size::Long); | ||
| 154 | UNIMPLEMENTED_IF_MSG(instr.generates_cc, | ||
| 155 | "Condition codes generation in I2F is not implemented"); | ||
| 156 | |||
| 157 | Node value = [&] { | ||
| 158 | switch (opcode->get().GetId()) { | ||
| 159 | case OpCode::Id::I2F_R: | ||
| 160 | return GetRegister(instr.gpr20); | ||
| 161 | case OpCode::Id::I2F_C: | ||
| 162 | return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()); | ||
| 163 | case OpCode::Id::I2F_IMM: | ||
| 164 | return Immediate(instr.alu.GetSignedImm20_20()); | ||
| 165 | default: | ||
| 166 | UNREACHABLE(); | ||
| 167 | return Immediate(0); | ||
| 168 | } | ||
| 169 | }(); | ||
| 170 | |||
| 171 | const bool input_signed = instr.conversion.is_input_signed; | ||
| 172 | |||
| 173 | if (const u32 offset = static_cast<u32>(instr.conversion.int_src.selector); offset > 0) { | ||
| 174 | ASSERT(instr.conversion.src_size == Register::Size::Byte || | ||
| 175 | instr.conversion.src_size == Register::Size::Short); | ||
| 176 | if (instr.conversion.src_size == Register::Size::Short) { | ||
| 177 | ASSERT(offset == 0 || offset == 2); | ||
| 178 | } | ||
| 179 | value = SignedOperation(OperationCode::ILogicalShiftRight, input_signed, | ||
| 180 | std::move(value), Immediate(offset * 8)); | ||
| 181 | } | ||
| 182 | |||
| 183 | value = ConvertIntegerSize(value, instr.conversion.src_size, input_signed); | ||
| 184 | value = GetOperandAbsNegInteger(value, instr.conversion.abs_a, false, input_signed); | ||
| 185 | value = SignedOperation(OperationCode::FCastInteger, input_signed, PRECISE, value); | ||
| 186 | value = GetOperandAbsNegFloat(value, false, instr.conversion.negate_a); | ||
| 187 | |||
| 188 | SetInternalFlagsFromFloat(bb, value, instr.generates_cc); | ||
| 189 | |||
| 190 | if (instr.conversion.dst_size == Register::Size::Short) { | ||
| 191 | value = Operation(OperationCode::HCastFloat, PRECISE, value); | ||
| 192 | } | ||
| 193 | |||
| 194 | SetRegister(bb, instr.gpr0, value); | ||
| 195 | break; | ||
| 196 | } | ||
| 197 | case OpCode::Id::F2F_R: | ||
| 198 | case OpCode::Id::F2F_C: | ||
| 199 | case OpCode::Id::F2F_IMM: { | ||
| 200 | UNIMPLEMENTED_IF(instr.conversion.dst_size == Register::Size::Long); | ||
| 201 | UNIMPLEMENTED_IF(instr.conversion.src_size == Register::Size::Long); | ||
| 202 | UNIMPLEMENTED_IF_MSG(instr.generates_cc, | ||
| 203 | "Condition codes generation in F2F is not implemented"); | ||
| 204 | |||
| 205 | Node value = [&]() { | ||
| 206 | switch (opcode->get().GetId()) { | ||
| 207 | case OpCode::Id::F2F_R: | ||
| 208 | return GetRegister(instr.gpr20); | ||
| 209 | case OpCode::Id::F2F_C: | ||
| 210 | return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()); | ||
| 211 | case OpCode::Id::F2F_IMM: | ||
| 212 | return GetImmediate19(instr); | ||
| 213 | default: | ||
| 214 | UNREACHABLE(); | ||
| 215 | return Immediate(0); | ||
| 216 | } | ||
| 217 | }(); | ||
| 218 | |||
| 219 | if (instr.conversion.src_size == Register::Size::Short) { | ||
| 220 | value = Operation(GetFloatSelector(instr.conversion.float_src.selector), NO_PRECISE, | ||
| 221 | std::move(value)); | ||
| 222 | } else { | ||
| 223 | ASSERT(instr.conversion.float_src.selector == 0); | ||
| 224 | } | ||
| 225 | |||
| 226 | value = GetOperandAbsNegFloat(value, instr.conversion.abs_a, instr.conversion.negate_a); | ||
| 227 | |||
| 228 | value = [&] { | ||
| 229 | if (instr.conversion.src_size != instr.conversion.dst_size) { | ||
| 230 | // Rounding operations only matter when the source and destination conversion size | ||
| 231 | // is the same. | ||
| 232 | return value; | ||
| 233 | } | ||
| 234 | switch (instr.conversion.f2f.GetRoundingMode()) { | ||
| 235 | case Tegra::Shader::F2fRoundingOp::None: | ||
| 236 | return value; | ||
| 237 | case Tegra::Shader::F2fRoundingOp::Round: | ||
| 238 | return Operation(OperationCode::FRoundEven, value); | ||
| 239 | case Tegra::Shader::F2fRoundingOp::Floor: | ||
| 240 | return Operation(OperationCode::FFloor, value); | ||
| 241 | case Tegra::Shader::F2fRoundingOp::Ceil: | ||
| 242 | return Operation(OperationCode::FCeil, value); | ||
| 243 | case Tegra::Shader::F2fRoundingOp::Trunc: | ||
| 244 | return Operation(OperationCode::FTrunc, value); | ||
| 245 | default: | ||
| 246 | UNIMPLEMENTED_MSG("Unimplemented F2F rounding mode {}", | ||
| 247 | instr.conversion.f2f.rounding.Value()); | ||
| 248 | return value; | ||
| 249 | } | ||
| 250 | }(); | ||
| 251 | value = GetSaturatedFloat(value, instr.alu.saturate_d); | ||
| 252 | |||
| 253 | SetInternalFlagsFromFloat(bb, value, instr.generates_cc); | ||
| 254 | |||
| 255 | if (instr.conversion.dst_size == Register::Size::Short) { | ||
| 256 | value = Operation(OperationCode::HCastFloat, PRECISE, value); | ||
| 257 | } | ||
| 258 | |||
| 259 | SetRegister(bb, instr.gpr0, value); | ||
| 260 | break; | ||
| 261 | } | ||
| 262 | case OpCode::Id::F2I_R: | ||
| 263 | case OpCode::Id::F2I_C: | ||
| 264 | case OpCode::Id::F2I_IMM: { | ||
| 265 | UNIMPLEMENTED_IF(instr.conversion.src_size == Register::Size::Long); | ||
| 266 | UNIMPLEMENTED_IF_MSG(instr.generates_cc, | ||
| 267 | "Condition codes generation in F2I is not implemented"); | ||
| 268 | Node value = [&]() { | ||
| 269 | switch (opcode->get().GetId()) { | ||
| 270 | case OpCode::Id::F2I_R: | ||
| 271 | return GetRegister(instr.gpr20); | ||
| 272 | case OpCode::Id::F2I_C: | ||
| 273 | return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()); | ||
| 274 | case OpCode::Id::F2I_IMM: | ||
| 275 | return GetImmediate19(instr); | ||
| 276 | default: | ||
| 277 | UNREACHABLE(); | ||
| 278 | return Immediate(0); | ||
| 279 | } | ||
| 280 | }(); | ||
| 281 | |||
| 282 | if (instr.conversion.src_size == Register::Size::Short) { | ||
| 283 | value = Operation(GetFloatSelector(instr.conversion.float_src.selector), NO_PRECISE, | ||
| 284 | std::move(value)); | ||
| 285 | } else { | ||
| 286 | ASSERT(instr.conversion.float_src.selector == 0); | ||
| 287 | } | ||
| 288 | |||
| 289 | value = GetOperandAbsNegFloat(value, instr.conversion.abs_a, instr.conversion.negate_a); | ||
| 290 | |||
| 291 | value = [&]() { | ||
| 292 | switch (instr.conversion.f2i.rounding) { | ||
| 293 | case Tegra::Shader::F2iRoundingOp::RoundEven: | ||
| 294 | return Operation(OperationCode::FRoundEven, PRECISE, value); | ||
| 295 | case Tegra::Shader::F2iRoundingOp::Floor: | ||
| 296 | return Operation(OperationCode::FFloor, PRECISE, value); | ||
| 297 | case Tegra::Shader::F2iRoundingOp::Ceil: | ||
| 298 | return Operation(OperationCode::FCeil, PRECISE, value); | ||
| 299 | case Tegra::Shader::F2iRoundingOp::Trunc: | ||
| 300 | return Operation(OperationCode::FTrunc, PRECISE, value); | ||
| 301 | default: | ||
| 302 | UNIMPLEMENTED_MSG("Unimplemented F2I rounding mode {}", | ||
| 303 | instr.conversion.f2i.rounding.Value()); | ||
| 304 | return Immediate(0); | ||
| 305 | } | ||
| 306 | }(); | ||
| 307 | const bool is_signed = instr.conversion.is_output_signed; | ||
| 308 | value = SignedOperation(OperationCode::ICastFloat, is_signed, PRECISE, value); | ||
| 309 | value = ConvertIntegerSize(value, instr.conversion.dst_size, is_signed); | ||
| 310 | |||
| 311 | SetRegister(bb, instr.gpr0, value); | ||
| 312 | break; | ||
| 313 | } | ||
| 314 | default: | ||
| 315 | UNIMPLEMENTED_MSG("Unhandled conversion instruction: {}", opcode->get().GetName()); | ||
| 316 | } | ||
| 317 | |||
| 318 | return pc; | ||
| 319 | } | ||
| 320 | |||
| 321 | } // namespace VideoCommon::Shader | ||
diff --git a/src/video_core/shader/decode/ffma.cpp b/src/video_core/shader/decode/ffma.cpp deleted file mode 100644 index 5973588d6..000000000 --- a/src/video_core/shader/decode/ffma.cpp +++ /dev/null | |||
| @@ -1,62 +0,0 @@ | |||
| 1 | // Copyright 2018 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/assert.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "video_core/engines/shader_bytecode.h" | ||
| 8 | #include "video_core/shader/node_helper.h" | ||
| 9 | #include "video_core/shader/shader_ir.h" | ||
| 10 | |||
| 11 | namespace VideoCommon::Shader { | ||
| 12 | |||
| 13 | using Tegra::Shader::Instruction; | ||
| 14 | using Tegra::Shader::OpCode; | ||
| 15 | |||
| 16 | u32 ShaderIR::DecodeFfma(NodeBlock& bb, u32 pc) { | ||
| 17 | const Instruction instr = {program_code[pc]}; | ||
| 18 | const auto opcode = OpCode::Decode(instr); | ||
| 19 | |||
| 20 | UNIMPLEMENTED_IF_MSG(instr.ffma.cc != 0, "FFMA cc not implemented"); | ||
| 21 | if (instr.ffma.tab5980_0 != 1) { | ||
| 22 | LOG_DEBUG(HW_GPU, "FFMA tab5980_0({}) not implemented", instr.ffma.tab5980_0.Value()); | ||
| 23 | } | ||
| 24 | if (instr.ffma.tab5980_1 != 0) { | ||
| 25 | LOG_DEBUG(HW_GPU, "FFMA tab5980_1({}) not implemented", instr.ffma.tab5980_1.Value()); | ||
| 26 | } | ||
| 27 | |||
| 28 | const Node op_a = GetRegister(instr.gpr8); | ||
| 29 | |||
| 30 | auto [op_b, op_c] = [&]() -> std::tuple<Node, Node> { | ||
| 31 | switch (opcode->get().GetId()) { | ||
| 32 | case OpCode::Id::FFMA_CR: { | ||
| 33 | return {GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()), | ||
| 34 | GetRegister(instr.gpr39)}; | ||
| 35 | } | ||
| 36 | case OpCode::Id::FFMA_RR: | ||
| 37 | return {GetRegister(instr.gpr20), GetRegister(instr.gpr39)}; | ||
| 38 | case OpCode::Id::FFMA_RC: { | ||
| 39 | return {GetRegister(instr.gpr39), | ||
| 40 | GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset())}; | ||
| 41 | } | ||
| 42 | case OpCode::Id::FFMA_IMM: | ||
| 43 | return {GetImmediate19(instr), GetRegister(instr.gpr39)}; | ||
| 44 | default: | ||
| 45 | UNIMPLEMENTED_MSG("Unhandled FFMA instruction: {}", opcode->get().GetName()); | ||
| 46 | return {Immediate(0), Immediate(0)}; | ||
| 47 | } | ||
| 48 | }(); | ||
| 49 | |||
| 50 | op_b = GetOperandAbsNegFloat(op_b, false, instr.ffma.negate_b); | ||
| 51 | op_c = GetOperandAbsNegFloat(op_c, false, instr.ffma.negate_c); | ||
| 52 | |||
| 53 | Node value = Operation(OperationCode::FFma, PRECISE, op_a, op_b, op_c); | ||
| 54 | value = GetSaturatedFloat(value, instr.alu.saturate_d); | ||
| 55 | |||
| 56 | SetInternalFlagsFromFloat(bb, value, instr.generates_cc); | ||
| 57 | SetRegister(bb, instr.gpr0, value); | ||
| 58 | |||
| 59 | return pc; | ||
| 60 | } | ||
| 61 | |||
| 62 | } // namespace VideoCommon::Shader | ||
diff --git a/src/video_core/shader/decode/float_set.cpp b/src/video_core/shader/decode/float_set.cpp deleted file mode 100644 index 5614e8a0d..000000000 --- a/src/video_core/shader/decode/float_set.cpp +++ /dev/null | |||
| @@ -1,58 +0,0 @@ | |||
| 1 | // Copyright 2018 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/assert.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "video_core/engines/shader_bytecode.h" | ||
| 8 | #include "video_core/shader/node_helper.h" | ||
| 9 | #include "video_core/shader/shader_ir.h" | ||
| 10 | |||
| 11 | namespace VideoCommon::Shader { | ||
| 12 | |||
| 13 | using Tegra::Shader::Instruction; | ||
| 14 | using Tegra::Shader::OpCode; | ||
| 15 | |||
| 16 | u32 ShaderIR::DecodeFloatSet(NodeBlock& bb, u32 pc) { | ||
| 17 | const Instruction instr = {program_code[pc]}; | ||
| 18 | |||
| 19 | const Node op_a = GetOperandAbsNegFloat(GetRegister(instr.gpr8), instr.fset.abs_a != 0, | ||
| 20 | instr.fset.neg_a != 0); | ||
| 21 | |||
| 22 | Node op_b = [&]() { | ||
| 23 | if (instr.is_b_imm) { | ||
| 24 | return GetImmediate19(instr); | ||
| 25 | } else if (instr.is_b_gpr) { | ||
| 26 | return GetRegister(instr.gpr20); | ||
| 27 | } else { | ||
| 28 | return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()); | ||
| 29 | } | ||
| 30 | }(); | ||
| 31 | |||
| 32 | op_b = GetOperandAbsNegFloat(op_b, instr.fset.abs_b != 0, instr.fset.neg_b != 0); | ||
| 33 | |||
| 34 | // The fset instruction sets a register to 1.0 or -1 (depending on the bf bit) if the | ||
| 35 | // condition is true, and to 0 otherwise. | ||
| 36 | const Node second_pred = GetPredicate(instr.fset.pred39, instr.fset.neg_pred != 0); | ||
| 37 | |||
| 38 | const OperationCode combiner = GetPredicateCombiner(instr.fset.op); | ||
| 39 | const Node first_pred = GetPredicateComparisonFloat(instr.fset.cond, op_a, op_b); | ||
| 40 | |||
| 41 | const Node predicate = Operation(combiner, first_pred, second_pred); | ||
| 42 | |||
| 43 | const Node true_value = instr.fset.bf ? Immediate(1.0f) : Immediate(-1); | ||
| 44 | const Node false_value = instr.fset.bf ? Immediate(0.0f) : Immediate(0); | ||
| 45 | const Node value = | ||
| 46 | Operation(OperationCode::Select, PRECISE, predicate, true_value, false_value); | ||
| 47 | |||
| 48 | if (instr.fset.bf) { | ||
| 49 | SetInternalFlagsFromFloat(bb, value, instr.generates_cc); | ||
| 50 | } else { | ||
| 51 | SetInternalFlagsFromInteger(bb, value, instr.generates_cc); | ||
| 52 | } | ||
| 53 | SetRegister(bb, instr.gpr0, value); | ||
| 54 | |||
| 55 | return pc; | ||
| 56 | } | ||
| 57 | |||
| 58 | } // namespace VideoCommon::Shader | ||
diff --git a/src/video_core/shader/decode/float_set_predicate.cpp b/src/video_core/shader/decode/float_set_predicate.cpp deleted file mode 100644 index 200c2c983..000000000 --- a/src/video_core/shader/decode/float_set_predicate.cpp +++ /dev/null | |||
| @@ -1,57 +0,0 @@ | |||
| 1 | // Copyright 2018 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/assert.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "video_core/engines/shader_bytecode.h" | ||
| 8 | #include "video_core/shader/node_helper.h" | ||
| 9 | #include "video_core/shader/shader_ir.h" | ||
| 10 | |||
| 11 | namespace VideoCommon::Shader { | ||
| 12 | |||
| 13 | using Tegra::Shader::Instruction; | ||
| 14 | using Tegra::Shader::OpCode; | ||
| 15 | using Tegra::Shader::Pred; | ||
| 16 | |||
| 17 | u32 ShaderIR::DecodeFloatSetPredicate(NodeBlock& bb, u32 pc) { | ||
| 18 | const Instruction instr = {program_code[pc]}; | ||
| 19 | |||
| 20 | Node op_a = GetOperandAbsNegFloat(GetRegister(instr.gpr8), instr.fsetp.abs_a != 0, | ||
| 21 | instr.fsetp.neg_a != 0); | ||
| 22 | Node op_b = [&]() { | ||
| 23 | if (instr.is_b_imm) { | ||
| 24 | return GetImmediate19(instr); | ||
| 25 | } else if (instr.is_b_gpr) { | ||
| 26 | return GetRegister(instr.gpr20); | ||
| 27 | } else { | ||
| 28 | return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()); | ||
| 29 | } | ||
| 30 | }(); | ||
| 31 | op_b = GetOperandAbsNegFloat(std::move(op_b), instr.fsetp.abs_b, instr.fsetp.neg_b); | ||
| 32 | |||
| 33 | // We can't use the constant predicate as destination. | ||
| 34 | ASSERT(instr.fsetp.pred3 != static_cast<u64>(Pred::UnusedIndex)); | ||
| 35 | |||
| 36 | const Node predicate = | ||
| 37 | GetPredicateComparisonFloat(instr.fsetp.cond, std::move(op_a), std::move(op_b)); | ||
| 38 | const Node second_pred = GetPredicate(instr.fsetp.pred39, instr.fsetp.neg_pred != 0); | ||
| 39 | |||
| 40 | const OperationCode combiner = GetPredicateCombiner(instr.fsetp.op); | ||
| 41 | const Node value = Operation(combiner, predicate, second_pred); | ||
| 42 | |||
| 43 | // Set the primary predicate to the result of Predicate OP SecondPredicate | ||
| 44 | SetPredicate(bb, instr.fsetp.pred3, value); | ||
| 45 | |||
| 46 | if (instr.fsetp.pred0 != static_cast<u64>(Pred::UnusedIndex)) { | ||
| 47 | // Set the secondary predicate to the result of !Predicate OP SecondPredicate, | ||
| 48 | // if enabled | ||
| 49 | const Node negated_pred = Operation(OperationCode::LogicalNegate, predicate); | ||
| 50 | const Node second_value = Operation(combiner, negated_pred, second_pred); | ||
| 51 | SetPredicate(bb, instr.fsetp.pred0, second_value); | ||
| 52 | } | ||
| 53 | |||
| 54 | return pc; | ||
| 55 | } | ||
| 56 | |||
| 57 | } // namespace VideoCommon::Shader | ||
diff --git a/src/video_core/shader/decode/half_set.cpp b/src/video_core/shader/decode/half_set.cpp deleted file mode 100644 index fa83108cd..000000000 --- a/src/video_core/shader/decode/half_set.cpp +++ /dev/null | |||
| @@ -1,115 +0,0 @@ | |||
| 1 | // Copyright 2018 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <array> | ||
| 6 | |||
| 7 | #include "common/assert.h" | ||
| 8 | #include "common/common_types.h" | ||
| 9 | #include "common/logging/log.h" | ||
| 10 | #include "video_core/engines/shader_bytecode.h" | ||
| 11 | #include "video_core/shader/node_helper.h" | ||
| 12 | #include "video_core/shader/shader_ir.h" | ||
| 13 | |||
| 14 | namespace VideoCommon::Shader { | ||
| 15 | |||
| 16 | using std::move; | ||
| 17 | using Tegra::Shader::Instruction; | ||
| 18 | using Tegra::Shader::OpCode; | ||
| 19 | using Tegra::Shader::PredCondition; | ||
| 20 | |||
| 21 | u32 ShaderIR::DecodeHalfSet(NodeBlock& bb, u32 pc) { | ||
| 22 | const Instruction instr = {program_code[pc]}; | ||
| 23 | const auto opcode = OpCode::Decode(instr); | ||
| 24 | |||
| 25 | PredCondition cond{}; | ||
| 26 | bool bf = false; | ||
| 27 | bool ftz = false; | ||
| 28 | bool neg_a = false; | ||
| 29 | bool abs_a = false; | ||
| 30 | bool neg_b = false; | ||
| 31 | bool abs_b = false; | ||
| 32 | switch (opcode->get().GetId()) { | ||
| 33 | case OpCode::Id::HSET2_C: | ||
| 34 | case OpCode::Id::HSET2_IMM: | ||
| 35 | cond = instr.hsetp2.cbuf_and_imm.cond; | ||
| 36 | bf = instr.Bit(53); | ||
| 37 | ftz = instr.Bit(54); | ||
| 38 | neg_a = instr.Bit(43); | ||
| 39 | abs_a = instr.Bit(44); | ||
| 40 | neg_b = instr.Bit(56); | ||
| 41 | abs_b = instr.Bit(54); | ||
| 42 | break; | ||
| 43 | case OpCode::Id::HSET2_R: | ||
| 44 | cond = instr.hsetp2.reg.cond; | ||
| 45 | bf = instr.Bit(49); | ||
| 46 | ftz = instr.Bit(50); | ||
| 47 | neg_a = instr.Bit(43); | ||
| 48 | abs_a = instr.Bit(44); | ||
| 49 | neg_b = instr.Bit(31); | ||
| 50 | abs_b = instr.Bit(30); | ||
| 51 | break; | ||
| 52 | default: | ||
| 53 | UNREACHABLE(); | ||
| 54 | } | ||
| 55 | |||
| 56 | Node op_b = [this, instr, opcode] { | ||
| 57 | switch (opcode->get().GetId()) { | ||
| 58 | case OpCode::Id::HSET2_C: | ||
| 59 | // Inform as unimplemented as this is not tested. | ||
| 60 | UNIMPLEMENTED_MSG("HSET2_C is not implemented"); | ||
| 61 | return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()); | ||
| 62 | case OpCode::Id::HSET2_R: | ||
| 63 | return GetRegister(instr.gpr20); | ||
| 64 | case OpCode::Id::HSET2_IMM: | ||
| 65 | return UnpackHalfImmediate(instr, true); | ||
| 66 | default: | ||
| 67 | UNREACHABLE(); | ||
| 68 | return Node{}; | ||
| 69 | } | ||
| 70 | }(); | ||
| 71 | |||
| 72 | if (!ftz) { | ||
| 73 | LOG_DEBUG(HW_GPU, "{} without FTZ is not implemented", opcode->get().GetName()); | ||
| 74 | } | ||
| 75 | |||
| 76 | Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.hset2.type_a); | ||
| 77 | op_a = GetOperandAbsNegHalf(op_a, abs_a, neg_a); | ||
| 78 | |||
| 79 | switch (opcode->get().GetId()) { | ||
| 80 | case OpCode::Id::HSET2_R: | ||
| 81 | op_b = GetOperandAbsNegHalf(move(op_b), abs_b, neg_b); | ||
| 82 | [[fallthrough]]; | ||
| 83 | case OpCode::Id::HSET2_C: | ||
| 84 | op_b = UnpackHalfFloat(move(op_b), instr.hset2.type_b); | ||
| 85 | break; | ||
| 86 | default: | ||
| 87 | break; | ||
| 88 | } | ||
| 89 | |||
| 90 | Node second_pred = GetPredicate(instr.hset2.pred39, instr.hset2.neg_pred); | ||
| 91 | |||
| 92 | Node comparison_pair = GetPredicateComparisonHalf(cond, op_a, op_b); | ||
| 93 | |||
| 94 | const OperationCode combiner = GetPredicateCombiner(instr.hset2.op); | ||
| 95 | |||
| 96 | // HSET2 operates on each half float in the pack. | ||
| 97 | std::array<Node, 2> values; | ||
| 98 | for (u32 i = 0; i < 2; ++i) { | ||
| 99 | const u32 raw_value = bf ? 0x3c00 : 0xffff; | ||
| 100 | Node true_value = Immediate(raw_value << (i * 16)); | ||
| 101 | Node false_value = Immediate(0); | ||
| 102 | |||
| 103 | Node comparison = Operation(OperationCode::LogicalPick2, comparison_pair, Immediate(i)); | ||
| 104 | Node predicate = Operation(combiner, comparison, second_pred); | ||
| 105 | values[i] = | ||
| 106 | Operation(OperationCode::Select, predicate, move(true_value), move(false_value)); | ||
| 107 | } | ||
| 108 | |||
| 109 | Node value = Operation(OperationCode::UBitwiseOr, values[0], values[1]); | ||
| 110 | SetRegister(bb, instr.gpr0, move(value)); | ||
| 111 | |||
| 112 | return pc; | ||
| 113 | } | ||
| 114 | |||
| 115 | } // namespace VideoCommon::Shader | ||
diff --git a/src/video_core/shader/decode/half_set_predicate.cpp b/src/video_core/shader/decode/half_set_predicate.cpp deleted file mode 100644 index 310655619..000000000 --- a/src/video_core/shader/decode/half_set_predicate.cpp +++ /dev/null | |||
| @@ -1,80 +0,0 @@ | |||
| 1 | // Copyright 2018 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/assert.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "common/logging/log.h" | ||
| 8 | #include "video_core/engines/shader_bytecode.h" | ||
| 9 | #include "video_core/shader/node_helper.h" | ||
| 10 | #include "video_core/shader/shader_ir.h" | ||
| 11 | |||
| 12 | namespace VideoCommon::Shader { | ||
| 13 | |||
| 14 | using Tegra::Shader::Instruction; | ||
| 15 | using Tegra::Shader::OpCode; | ||
| 16 | using Tegra::Shader::Pred; | ||
| 17 | |||
| 18 | u32 ShaderIR::DecodeHalfSetPredicate(NodeBlock& bb, u32 pc) { | ||
| 19 | const Instruction instr = {program_code[pc]}; | ||
| 20 | const auto opcode = OpCode::Decode(instr); | ||
| 21 | |||
| 22 | if (instr.hsetp2.ftz != 0) { | ||
| 23 | LOG_DEBUG(HW_GPU, "{} without FTZ is not implemented", opcode->get().GetName()); | ||
| 24 | } | ||
| 25 | |||
| 26 | Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.hsetp2.type_a); | ||
| 27 | op_a = GetOperandAbsNegHalf(op_a, instr.hsetp2.abs_a, instr.hsetp2.negate_a); | ||
| 28 | |||
| 29 | Tegra::Shader::PredCondition cond{}; | ||
| 30 | bool h_and{}; | ||
| 31 | Node op_b{}; | ||
| 32 | switch (opcode->get().GetId()) { | ||
| 33 | case OpCode::Id::HSETP2_C: | ||
| 34 | cond = instr.hsetp2.cbuf_and_imm.cond; | ||
| 35 | h_and = instr.hsetp2.cbuf_and_imm.h_and; | ||
| 36 | op_b = GetOperandAbsNegHalf(GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()), | ||
| 37 | instr.hsetp2.cbuf.abs_b, instr.hsetp2.cbuf.negate_b); | ||
| 38 | // F32 is hardcoded in hardware | ||
| 39 | op_b = UnpackHalfFloat(std::move(op_b), Tegra::Shader::HalfType::F32); | ||
| 40 | break; | ||
| 41 | case OpCode::Id::HSETP2_IMM: | ||
| 42 | cond = instr.hsetp2.cbuf_and_imm.cond; | ||
| 43 | h_and = instr.hsetp2.cbuf_and_imm.h_and; | ||
| 44 | op_b = UnpackHalfImmediate(instr, true); | ||
| 45 | break; | ||
| 46 | case OpCode::Id::HSETP2_R: | ||
| 47 | cond = instr.hsetp2.reg.cond; | ||
| 48 | h_and = instr.hsetp2.reg.h_and; | ||
| 49 | op_b = | ||
| 50 | GetOperandAbsNegHalf(UnpackHalfFloat(GetRegister(instr.gpr20), instr.hsetp2.reg.type_b), | ||
| 51 | instr.hsetp2.reg.abs_b, instr.hsetp2.reg.negate_b); | ||
| 52 | break; | ||
| 53 | default: | ||
| 54 | UNREACHABLE(); | ||
| 55 | op_b = Immediate(0); | ||
| 56 | } | ||
| 57 | |||
| 58 | const OperationCode combiner = GetPredicateCombiner(instr.hsetp2.op); | ||
| 59 | const Node combined_pred = GetPredicate(instr.hsetp2.pred39, instr.hsetp2.neg_pred); | ||
| 60 | |||
| 61 | const auto Write = [&](u64 dest, Node src) { | ||
| 62 | SetPredicate(bb, dest, Operation(combiner, std::move(src), combined_pred)); | ||
| 63 | }; | ||
| 64 | |||
| 65 | const Node comparison = GetPredicateComparisonHalf(cond, op_a, op_b); | ||
| 66 | const u64 first = instr.hsetp2.pred3; | ||
| 67 | const u64 second = instr.hsetp2.pred0; | ||
| 68 | if (h_and) { | ||
| 69 | Node joined = Operation(OperationCode::LogicalAnd2, comparison); | ||
| 70 | Write(first, joined); | ||
| 71 | Write(second, Operation(OperationCode::LogicalNegate, std::move(joined))); | ||
| 72 | } else { | ||
| 73 | Write(first, Operation(OperationCode::LogicalPick2, comparison, Immediate(0U))); | ||
| 74 | Write(second, Operation(OperationCode::LogicalPick2, comparison, Immediate(1U))); | ||
| 75 | } | ||
| 76 | |||
| 77 | return pc; | ||
| 78 | } | ||
| 79 | |||
| 80 | } // namespace VideoCommon::Shader | ||
diff --git a/src/video_core/shader/decode/hfma2.cpp b/src/video_core/shader/decode/hfma2.cpp deleted file mode 100644 index 5b44cb79c..000000000 --- a/src/video_core/shader/decode/hfma2.cpp +++ /dev/null | |||
| @@ -1,73 +0,0 @@ | |||
| 1 | // Copyright 2018 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <tuple> | ||
| 6 | |||
| 7 | #include "common/assert.h" | ||
| 8 | #include "common/common_types.h" | ||
| 9 | #include "video_core/engines/shader_bytecode.h" | ||
| 10 | #include "video_core/shader/node_helper.h" | ||
| 11 | #include "video_core/shader/shader_ir.h" | ||
| 12 | |||
| 13 | namespace VideoCommon::Shader { | ||
| 14 | |||
| 15 | using Tegra::Shader::HalfPrecision; | ||
| 16 | using Tegra::Shader::HalfType; | ||
| 17 | using Tegra::Shader::Instruction; | ||
| 18 | using Tegra::Shader::OpCode; | ||
| 19 | |||
| 20 | u32 ShaderIR::DecodeHfma2(NodeBlock& bb, u32 pc) { | ||
| 21 | const Instruction instr = {program_code[pc]}; | ||
| 22 | const auto opcode = OpCode::Decode(instr); | ||
| 23 | |||
| 24 | if (opcode->get().GetId() == OpCode::Id::HFMA2_RR) { | ||
| 25 | DEBUG_ASSERT(instr.hfma2.rr.precision == HalfPrecision::None); | ||
| 26 | } else { | ||
| 27 | DEBUG_ASSERT(instr.hfma2.precision == HalfPrecision::None); | ||
| 28 | } | ||
| 29 | |||
| 30 | constexpr auto identity = HalfType::H0_H1; | ||
| 31 | bool neg_b{}, neg_c{}; | ||
| 32 | auto [saturate, type_b, op_b, type_c, | ||
| 33 | op_c] = [&]() -> std::tuple<bool, HalfType, Node, HalfType, Node> { | ||
| 34 | switch (opcode->get().GetId()) { | ||
| 35 | case OpCode::Id::HFMA2_CR: | ||
| 36 | neg_b = instr.hfma2.negate_b; | ||
| 37 | neg_c = instr.hfma2.negate_c; | ||
| 38 | return {instr.hfma2.saturate, HalfType::F32, | ||
| 39 | GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()), | ||
| 40 | instr.hfma2.type_reg39, GetRegister(instr.gpr39)}; | ||
| 41 | case OpCode::Id::HFMA2_RC: | ||
| 42 | neg_b = instr.hfma2.negate_b; | ||
| 43 | neg_c = instr.hfma2.negate_c; | ||
| 44 | return {instr.hfma2.saturate, instr.hfma2.type_reg39, GetRegister(instr.gpr39), | ||
| 45 | HalfType::F32, GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset())}; | ||
| 46 | case OpCode::Id::HFMA2_RR: | ||
| 47 | neg_b = instr.hfma2.rr.negate_b; | ||
| 48 | neg_c = instr.hfma2.rr.negate_c; | ||
| 49 | return {instr.hfma2.rr.saturate, instr.hfma2.type_b, GetRegister(instr.gpr20), | ||
| 50 | instr.hfma2.rr.type_c, GetRegister(instr.gpr39)}; | ||
| 51 | case OpCode::Id::HFMA2_IMM_R: | ||
| 52 | neg_c = instr.hfma2.negate_c; | ||
| 53 | return {instr.hfma2.saturate, identity, UnpackHalfImmediate(instr, true), | ||
| 54 | instr.hfma2.type_reg39, GetRegister(instr.gpr39)}; | ||
| 55 | default: | ||
| 56 | return {false, identity, Immediate(0), identity, Immediate(0)}; | ||
| 57 | } | ||
| 58 | }(); | ||
| 59 | |||
| 60 | const Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.hfma2.type_a); | ||
| 61 | op_b = GetOperandAbsNegHalf(UnpackHalfFloat(op_b, type_b), false, neg_b); | ||
| 62 | op_c = GetOperandAbsNegHalf(UnpackHalfFloat(op_c, type_c), false, neg_c); | ||
| 63 | |||
| 64 | Node value = Operation(OperationCode::HFma, PRECISE, op_a, op_b, op_c); | ||
| 65 | value = GetSaturatedHalfFloat(value, saturate); | ||
| 66 | value = HalfMerge(GetRegister(instr.gpr0), value, instr.hfma2.merge); | ||
| 67 | |||
| 68 | SetRegister(bb, instr.gpr0, value); | ||
| 69 | |||
| 70 | return pc; | ||
| 71 | } | ||
| 72 | |||
| 73 | } // namespace VideoCommon::Shader | ||
diff --git a/src/video_core/shader/decode/image.cpp b/src/video_core/shader/decode/image.cpp deleted file mode 100644 index 5470e8cf4..000000000 --- a/src/video_core/shader/decode/image.cpp +++ /dev/null | |||
| @@ -1,536 +0,0 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <algorithm> | ||
| 6 | #include <vector> | ||
| 7 | #include <fmt/format.h> | ||
| 8 | |||
| 9 | #include "common/assert.h" | ||
| 10 | #include "common/bit_field.h" | ||
| 11 | #include "common/common_types.h" | ||
| 12 | #include "common/logging/log.h" | ||
| 13 | #include "video_core/engines/shader_bytecode.h" | ||
| 14 | #include "video_core/shader/node_helper.h" | ||
| 15 | #include "video_core/shader/shader_ir.h" | ||
| 16 | #include "video_core/textures/texture.h" | ||
| 17 | |||
| 18 | namespace VideoCommon::Shader { | ||
| 19 | |||
| 20 | using Tegra::Shader::Instruction; | ||
| 21 | using Tegra::Shader::OpCode; | ||
| 22 | using Tegra::Shader::PredCondition; | ||
| 23 | using Tegra::Shader::StoreType; | ||
| 24 | using Tegra::Texture::ComponentType; | ||
| 25 | using Tegra::Texture::TextureFormat; | ||
| 26 | using Tegra::Texture::TICEntry; | ||
| 27 | |||
| 28 | namespace { | ||
| 29 | |||
| 30 | ComponentType GetComponentType(Tegra::Engines::SamplerDescriptor descriptor, | ||
| 31 | std::size_t component) { | ||
| 32 | const TextureFormat format{descriptor.format}; | ||
| 33 | switch (format) { | ||
| 34 | case TextureFormat::R16G16B16A16: | ||
| 35 | case TextureFormat::R32G32B32A32: | ||
| 36 | case TextureFormat::R32G32B32: | ||
| 37 | case TextureFormat::R32G32: | ||
| 38 | case TextureFormat::R16G16: | ||
| 39 | case TextureFormat::R32: | ||
| 40 | case TextureFormat::R16: | ||
| 41 | case TextureFormat::R8: | ||
| 42 | case TextureFormat::R1: | ||
| 43 | if (component == 0) { | ||
| 44 | return descriptor.r_type; | ||
| 45 | } | ||
| 46 | if (component == 1) { | ||
| 47 | return descriptor.g_type; | ||
| 48 | } | ||
| 49 | if (component == 2) { | ||
| 50 | return descriptor.b_type; | ||
| 51 | } | ||
| 52 | if (component == 3) { | ||
| 53 | return descriptor.a_type; | ||
| 54 | } | ||
| 55 | break; | ||
| 56 | case TextureFormat::A8R8G8B8: | ||
| 57 | if (component == 0) { | ||
| 58 | return descriptor.a_type; | ||
| 59 | } | ||
| 60 | if (component == 1) { | ||
| 61 | return descriptor.r_type; | ||
| 62 | } | ||
| 63 | if (component == 2) { | ||
| 64 | return descriptor.g_type; | ||
| 65 | } | ||
| 66 | if (component == 3) { | ||
| 67 | return descriptor.b_type; | ||
| 68 | } | ||
| 69 | break; | ||
| 70 | case TextureFormat::A2B10G10R10: | ||
| 71 | case TextureFormat::A4B4G4R4: | ||
| 72 | case TextureFormat::A5B5G5R1: | ||
| 73 | case TextureFormat::A1B5G5R5: | ||
| 74 | if (component == 0) { | ||
| 75 | return descriptor.a_type; | ||
| 76 | } | ||
| 77 | if (component == 1) { | ||
| 78 | return descriptor.b_type; | ||
| 79 | } | ||
| 80 | if (component == 2) { | ||
| 81 | return descriptor.g_type; | ||
| 82 | } | ||
| 83 | if (component == 3) { | ||
| 84 | return descriptor.r_type; | ||
| 85 | } | ||
| 86 | break; | ||
| 87 | case TextureFormat::R32_B24G8: | ||
| 88 | if (component == 0) { | ||
| 89 | return descriptor.r_type; | ||
| 90 | } | ||
| 91 | if (component == 1) { | ||
| 92 | return descriptor.b_type; | ||
| 93 | } | ||
| 94 | if (component == 2) { | ||
| 95 | return descriptor.g_type; | ||
| 96 | } | ||
| 97 | break; | ||
| 98 | case TextureFormat::B5G6R5: | ||
| 99 | case TextureFormat::B6G5R5: | ||
| 100 | case TextureFormat::B10G11R11: | ||
| 101 | if (component == 0) { | ||
| 102 | return descriptor.b_type; | ||
| 103 | } | ||
| 104 | if (component == 1) { | ||
| 105 | return descriptor.g_type; | ||
| 106 | } | ||
| 107 | if (component == 2) { | ||
| 108 | return descriptor.r_type; | ||
| 109 | } | ||
| 110 | break; | ||
| 111 | case TextureFormat::R24G8: | ||
| 112 | case TextureFormat::R8G24: | ||
| 113 | case TextureFormat::R8G8: | ||
| 114 | case TextureFormat::G4R4: | ||
| 115 | if (component == 0) { | ||
| 116 | return descriptor.g_type; | ||
| 117 | } | ||
| 118 | if (component == 1) { | ||
| 119 | return descriptor.r_type; | ||
| 120 | } | ||
| 121 | break; | ||
| 122 | default: | ||
| 123 | break; | ||
| 124 | } | ||
| 125 | UNIMPLEMENTED_MSG("Texture format not implemented={}", format); | ||
| 126 | return ComponentType::FLOAT; | ||
| 127 | } | ||
| 128 | |||
| 129 | bool IsComponentEnabled(std::size_t component_mask, std::size_t component) { | ||
| 130 | constexpr u8 R = 0b0001; | ||
| 131 | constexpr u8 G = 0b0010; | ||
| 132 | constexpr u8 B = 0b0100; | ||
| 133 | constexpr u8 A = 0b1000; | ||
| 134 | constexpr std::array<u8, 16> mask = { | ||
| 135 | 0, (R), (G), (R | G), (B), (R | B), (G | B), (R | G | B), | ||
| 136 | (A), (R | A), (G | A), (R | G | A), (B | A), (R | B | A), (G | B | A), (R | G | B | A)}; | ||
| 137 | return std::bitset<4>{mask.at(component_mask)}.test(component); | ||
| 138 | } | ||
| 139 | |||
| 140 | u32 GetComponentSize(TextureFormat format, std::size_t component) { | ||
| 141 | switch (format) { | ||
| 142 | case TextureFormat::R32G32B32A32: | ||
| 143 | return 32; | ||
| 144 | case TextureFormat::R16G16B16A16: | ||
| 145 | return 16; | ||
| 146 | case TextureFormat::R32G32B32: | ||
| 147 | return component <= 2 ? 32 : 0; | ||
| 148 | case TextureFormat::R32G32: | ||
| 149 | return component <= 1 ? 32 : 0; | ||
| 150 | case TextureFormat::R16G16: | ||
| 151 | return component <= 1 ? 16 : 0; | ||
| 152 | case TextureFormat::R32: | ||
| 153 | return component == 0 ? 32 : 0; | ||
| 154 | case TextureFormat::R16: | ||
| 155 | return component == 0 ? 16 : 0; | ||
| 156 | case TextureFormat::R8: | ||
| 157 | return component == 0 ? 8 : 0; | ||
| 158 | case TextureFormat::R1: | ||
| 159 | return component == 0 ? 1 : 0; | ||
| 160 | case TextureFormat::A8R8G8B8: | ||
| 161 | return 8; | ||
| 162 | case TextureFormat::A2B10G10R10: | ||
| 163 | return (component == 3 || component == 2 || component == 1) ? 10 : 2; | ||
| 164 | case TextureFormat::A4B4G4R4: | ||
| 165 | return 4; | ||
| 166 | case TextureFormat::A5B5G5R1: | ||
| 167 | return (component == 0 || component == 1 || component == 2) ? 5 : 1; | ||
| 168 | case TextureFormat::A1B5G5R5: | ||
| 169 | return (component == 1 || component == 2 || component == 3) ? 5 : 1; | ||
| 170 | case TextureFormat::R32_B24G8: | ||
| 171 | if (component == 0) { | ||
| 172 | return 32; | ||
| 173 | } | ||
| 174 | if (component == 1) { | ||
| 175 | return 24; | ||
| 176 | } | ||
| 177 | if (component == 2) { | ||
| 178 | return 8; | ||
| 179 | } | ||
| 180 | return 0; | ||
| 181 | case TextureFormat::B5G6R5: | ||
| 182 | if (component == 0 || component == 2) { | ||
| 183 | return 5; | ||
| 184 | } | ||
| 185 | if (component == 1) { | ||
| 186 | return 6; | ||
| 187 | } | ||
| 188 | return 0; | ||
| 189 | case TextureFormat::B6G5R5: | ||
| 190 | if (component == 1 || component == 2) { | ||
| 191 | return 5; | ||
| 192 | } | ||
| 193 | if (component == 0) { | ||
| 194 | return 6; | ||
| 195 | } | ||
| 196 | return 0; | ||
| 197 | case TextureFormat::B10G11R11: | ||
| 198 | if (component == 1 || component == 2) { | ||
| 199 | return 11; | ||
| 200 | } | ||
| 201 | if (component == 0) { | ||
| 202 | return 10; | ||
| 203 | } | ||
| 204 | return 0; | ||
| 205 | case TextureFormat::R24G8: | ||
| 206 | if (component == 0) { | ||
| 207 | return 8; | ||
| 208 | } | ||
| 209 | if (component == 1) { | ||
| 210 | return 24; | ||
| 211 | } | ||
| 212 | return 0; | ||
| 213 | case TextureFormat::R8G24: | ||
| 214 | if (component == 0) { | ||
| 215 | return 24; | ||
| 216 | } | ||
| 217 | if (component == 1) { | ||
| 218 | return 8; | ||
| 219 | } | ||
| 220 | return 0; | ||
| 221 | case TextureFormat::R8G8: | ||
| 222 | return (component == 0 || component == 1) ? 8 : 0; | ||
| 223 | case TextureFormat::G4R4: | ||
| 224 | return (component == 0 || component == 1) ? 4 : 0; | ||
| 225 | default: | ||
| 226 | UNIMPLEMENTED_MSG("Texture format not implemented={}", format); | ||
| 227 | return 0; | ||
| 228 | } | ||
| 229 | } | ||
| 230 | |||
| 231 | std::size_t GetImageComponentMask(TextureFormat format) { | ||
| 232 | constexpr u8 R = 0b0001; | ||
| 233 | constexpr u8 G = 0b0010; | ||
| 234 | constexpr u8 B = 0b0100; | ||
| 235 | constexpr u8 A = 0b1000; | ||
| 236 | switch (format) { | ||
| 237 | case TextureFormat::R32G32B32A32: | ||
| 238 | case TextureFormat::R16G16B16A16: | ||
| 239 | case TextureFormat::A8R8G8B8: | ||
| 240 | case TextureFormat::A2B10G10R10: | ||
| 241 | case TextureFormat::A4B4G4R4: | ||
| 242 | case TextureFormat::A5B5G5R1: | ||
| 243 | case TextureFormat::A1B5G5R5: | ||
| 244 | return std::size_t{R | G | B | A}; | ||
| 245 | case TextureFormat::R32G32B32: | ||
| 246 | case TextureFormat::R32_B24G8: | ||
| 247 | case TextureFormat::B5G6R5: | ||
| 248 | case TextureFormat::B6G5R5: | ||
| 249 | case TextureFormat::B10G11R11: | ||
| 250 | return std::size_t{R | G | B}; | ||
| 251 | case TextureFormat::R32G32: | ||
| 252 | case TextureFormat::R16G16: | ||
| 253 | case TextureFormat::R24G8: | ||
| 254 | case TextureFormat::R8G24: | ||
| 255 | case TextureFormat::R8G8: | ||
| 256 | case TextureFormat::G4R4: | ||
| 257 | return std::size_t{R | G}; | ||
| 258 | case TextureFormat::R32: | ||
| 259 | case TextureFormat::R16: | ||
| 260 | case TextureFormat::R8: | ||
| 261 | case TextureFormat::R1: | ||
| 262 | return std::size_t{R}; | ||
| 263 | default: | ||
| 264 | UNIMPLEMENTED_MSG("Texture format not implemented={}", format); | ||
| 265 | return std::size_t{R | G | B | A}; | ||
| 266 | } | ||
| 267 | } | ||
| 268 | |||
| 269 | std::size_t GetImageTypeNumCoordinates(Tegra::Shader::ImageType image_type) { | ||
| 270 | switch (image_type) { | ||
| 271 | case Tegra::Shader::ImageType::Texture1D: | ||
| 272 | case Tegra::Shader::ImageType::TextureBuffer: | ||
| 273 | return 1; | ||
| 274 | case Tegra::Shader::ImageType::Texture1DArray: | ||
| 275 | case Tegra::Shader::ImageType::Texture2D: | ||
| 276 | return 2; | ||
| 277 | case Tegra::Shader::ImageType::Texture2DArray: | ||
| 278 | case Tegra::Shader::ImageType::Texture3D: | ||
| 279 | return 3; | ||
| 280 | } | ||
| 281 | UNREACHABLE(); | ||
| 282 | return 1; | ||
| 283 | } | ||
| 284 | } // Anonymous namespace | ||
| 285 | |||
| 286 | std::pair<Node, bool> ShaderIR::GetComponentValue(ComponentType component_type, u32 component_size, | ||
| 287 | Node original_value) { | ||
| 288 | switch (component_type) { | ||
| 289 | case ComponentType::SNORM: { | ||
| 290 | // range [-1.0, 1.0] | ||
| 291 | auto cnv_value = Operation(OperationCode::FMul, original_value, | ||
| 292 | Immediate(static_cast<float>(1 << component_size) / 2.f - 1.f)); | ||
| 293 | cnv_value = Operation(OperationCode::ICastFloat, std::move(cnv_value)); | ||
| 294 | return {BitfieldExtract(std::move(cnv_value), 0, component_size), true}; | ||
| 295 | } | ||
| 296 | case ComponentType::SINT: | ||
| 297 | case ComponentType::UNORM: { | ||
| 298 | bool is_signed = component_type == ComponentType::SINT; | ||
| 299 | // range [0.0, 1.0] | ||
| 300 | auto cnv_value = Operation(OperationCode::FMul, original_value, | ||
| 301 | Immediate(static_cast<float>(1 << component_size) - 1.f)); | ||
| 302 | return {SignedOperation(OperationCode::ICastFloat, is_signed, std::move(cnv_value)), | ||
| 303 | is_signed}; | ||
| 304 | } | ||
| 305 | case ComponentType::UINT: // range [0, (1 << component_size) - 1] | ||
| 306 | return {std::move(original_value), false}; | ||
| 307 | case ComponentType::FLOAT: | ||
| 308 | if (component_size == 16) { | ||
| 309 | return {Operation(OperationCode::HCastFloat, original_value), true}; | ||
| 310 | } else { | ||
| 311 | return {std::move(original_value), true}; | ||
| 312 | } | ||
| 313 | default: | ||
| 314 | UNIMPLEMENTED_MSG("Unimplemented component type={}", component_type); | ||
| 315 | return {std::move(original_value), true}; | ||
| 316 | } | ||
| 317 | } | ||
| 318 | |||
| 319 | u32 ShaderIR::DecodeImage(NodeBlock& bb, u32 pc) { | ||
| 320 | const Instruction instr = {program_code[pc]}; | ||
| 321 | const auto opcode = OpCode::Decode(instr); | ||
| 322 | |||
| 323 | const auto GetCoordinates = [this, instr](Tegra::Shader::ImageType image_type) { | ||
| 324 | std::vector<Node> coords; | ||
| 325 | const std::size_t num_coords{GetImageTypeNumCoordinates(image_type)}; | ||
| 326 | coords.reserve(num_coords); | ||
| 327 | for (std::size_t i = 0; i < num_coords; ++i) { | ||
| 328 | coords.push_back(GetRegister(instr.gpr8.Value() + i)); | ||
| 329 | } | ||
| 330 | return coords; | ||
| 331 | }; | ||
| 332 | |||
| 333 | switch (opcode->get().GetId()) { | ||
| 334 | case OpCode::Id::SULD: { | ||
| 335 | UNIMPLEMENTED_IF(instr.suldst.out_of_bounds_store != | ||
| 336 | Tegra::Shader::OutOfBoundsStore::Ignore); | ||
| 337 | |||
| 338 | const auto type{instr.suldst.image_type}; | ||
| 339 | auto& image{instr.suldst.is_immediate ? GetImage(instr.image, type) | ||
| 340 | : GetBindlessImage(instr.gpr39, type)}; | ||
| 341 | image.MarkRead(); | ||
| 342 | |||
| 343 | if (instr.suldst.mode == Tegra::Shader::SurfaceDataMode::P) { | ||
| 344 | u32 indexer = 0; | ||
| 345 | for (u32 element = 0; element < 4; ++element) { | ||
| 346 | if (!instr.suldst.IsComponentEnabled(element)) { | ||
| 347 | continue; | ||
| 348 | } | ||
| 349 | MetaImage meta{image, {}, element}; | ||
| 350 | Node value = Operation(OperationCode::ImageLoad, meta, GetCoordinates(type)); | ||
| 351 | SetTemporary(bb, indexer++, std::move(value)); | ||
| 352 | } | ||
| 353 | for (u32 i = 0; i < indexer; ++i) { | ||
| 354 | SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i)); | ||
| 355 | } | ||
| 356 | } else if (instr.suldst.mode == Tegra::Shader::SurfaceDataMode::D_BA) { | ||
| 357 | UNIMPLEMENTED_IF(instr.suldst.GetStoreDataLayout() != StoreType::Bits32 && | ||
| 358 | instr.suldst.GetStoreDataLayout() != StoreType::Bits64); | ||
| 359 | |||
| 360 | auto descriptor = [this, instr] { | ||
| 361 | std::optional<Tegra::Engines::SamplerDescriptor> sampler_descriptor; | ||
| 362 | if (instr.suldst.is_immediate) { | ||
| 363 | sampler_descriptor = | ||
| 364 | registry.ObtainBoundSampler(static_cast<u32>(instr.image.index.Value())); | ||
| 365 | } else { | ||
| 366 | const Node image_register = GetRegister(instr.gpr39); | ||
| 367 | const auto result = TrackCbuf(image_register, global_code, | ||
| 368 | static_cast<s64>(global_code.size())); | ||
| 369 | const auto buffer = std::get<1>(result); | ||
| 370 | const auto offset = std::get<2>(result); | ||
| 371 | sampler_descriptor = registry.ObtainBindlessSampler(buffer, offset); | ||
| 372 | } | ||
| 373 | if (!sampler_descriptor) { | ||
| 374 | UNREACHABLE_MSG("Failed to obtain image descriptor"); | ||
| 375 | } | ||
| 376 | return *sampler_descriptor; | ||
| 377 | }(); | ||
| 378 | |||
| 379 | const auto comp_mask = GetImageComponentMask(descriptor.format); | ||
| 380 | |||
| 381 | switch (instr.suldst.GetStoreDataLayout()) { | ||
| 382 | case StoreType::Bits32: | ||
| 383 | case StoreType::Bits64: { | ||
| 384 | u32 indexer = 0; | ||
| 385 | u32 shifted_counter = 0; | ||
| 386 | Node value = Immediate(0); | ||
| 387 | for (u32 element = 0; element < 4; ++element) { | ||
| 388 | if (!IsComponentEnabled(comp_mask, element)) { | ||
| 389 | continue; | ||
| 390 | } | ||
| 391 | const auto component_type = GetComponentType(descriptor, element); | ||
| 392 | const auto component_size = GetComponentSize(descriptor.format, element); | ||
| 393 | MetaImage meta{image, {}, element}; | ||
| 394 | |||
| 395 | auto [converted_value, is_signed] = GetComponentValue( | ||
| 396 | component_type, component_size, | ||
| 397 | Operation(OperationCode::ImageLoad, meta, GetCoordinates(type))); | ||
| 398 | |||
| 399 | // shift element to correct position | ||
| 400 | const auto shifted = shifted_counter; | ||
| 401 | if (shifted > 0) { | ||
| 402 | converted_value = | ||
| 403 | SignedOperation(OperationCode::ILogicalShiftLeft, is_signed, | ||
| 404 | std::move(converted_value), Immediate(shifted)); | ||
| 405 | } | ||
| 406 | shifted_counter += component_size; | ||
| 407 | |||
| 408 | // add value into result | ||
| 409 | value = Operation(OperationCode::UBitwiseOr, value, std::move(converted_value)); | ||
| 410 | |||
| 411 | // if we shifted enough for 1 byte -> we save it into temp | ||
| 412 | if (shifted_counter >= 32) { | ||
| 413 | SetTemporary(bb, indexer++, std::move(value)); | ||
| 414 | // reset counter and value to prepare pack next byte | ||
| 415 | value = Immediate(0); | ||
| 416 | shifted_counter = 0; | ||
| 417 | } | ||
| 418 | } | ||
| 419 | for (u32 i = 0; i < indexer; ++i) { | ||
| 420 | SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i)); | ||
| 421 | } | ||
| 422 | break; | ||
| 423 | } | ||
| 424 | default: | ||
| 425 | UNREACHABLE(); | ||
| 426 | break; | ||
| 427 | } | ||
| 428 | } | ||
| 429 | break; | ||
| 430 | } | ||
| 431 | case OpCode::Id::SUST: { | ||
| 432 | UNIMPLEMENTED_IF(instr.suldst.mode != Tegra::Shader::SurfaceDataMode::P); | ||
| 433 | UNIMPLEMENTED_IF(instr.suldst.out_of_bounds_store != | ||
| 434 | Tegra::Shader::OutOfBoundsStore::Ignore); | ||
| 435 | UNIMPLEMENTED_IF(instr.suldst.component_mask_selector != 0xf); // Ensure we have RGBA | ||
| 436 | |||
| 437 | std::vector<Node> values; | ||
| 438 | constexpr std::size_t hardcoded_size{4}; | ||
| 439 | for (std::size_t i = 0; i < hardcoded_size; ++i) { | ||
| 440 | values.push_back(GetRegister(instr.gpr0.Value() + i)); | ||
| 441 | } | ||
| 442 | |||
| 443 | const auto type{instr.suldst.image_type}; | ||
| 444 | auto& image{instr.suldst.is_immediate ? GetImage(instr.image, type) | ||
| 445 | : GetBindlessImage(instr.gpr39, type)}; | ||
| 446 | image.MarkWrite(); | ||
| 447 | |||
| 448 | MetaImage meta{image, std::move(values)}; | ||
| 449 | bb.push_back(Operation(OperationCode::ImageStore, meta, GetCoordinates(type))); | ||
| 450 | break; | ||
| 451 | } | ||
| 452 | case OpCode::Id::SUATOM: { | ||
| 453 | UNIMPLEMENTED_IF(instr.suatom_d.is_ba != 0); | ||
| 454 | |||
| 455 | const OperationCode operation_code = [instr] { | ||
| 456 | switch (instr.suatom_d.operation_type) { | ||
| 457 | case Tegra::Shader::ImageAtomicOperationType::S32: | ||
| 458 | case Tegra::Shader::ImageAtomicOperationType::U32: | ||
| 459 | switch (instr.suatom_d.operation) { | ||
| 460 | case Tegra::Shader::ImageAtomicOperation::Add: | ||
| 461 | return OperationCode::AtomicImageAdd; | ||
| 462 | case Tegra::Shader::ImageAtomicOperation::And: | ||
| 463 | return OperationCode::AtomicImageAnd; | ||
| 464 | case Tegra::Shader::ImageAtomicOperation::Or: | ||
| 465 | return OperationCode::AtomicImageOr; | ||
| 466 | case Tegra::Shader::ImageAtomicOperation::Xor: | ||
| 467 | return OperationCode::AtomicImageXor; | ||
| 468 | case Tegra::Shader::ImageAtomicOperation::Exch: | ||
| 469 | return OperationCode::AtomicImageExchange; | ||
| 470 | default: | ||
| 471 | break; | ||
| 472 | } | ||
| 473 | break; | ||
| 474 | default: | ||
| 475 | break; | ||
| 476 | } | ||
| 477 | UNIMPLEMENTED_MSG("Unimplemented operation={}, type={}", | ||
| 478 | static_cast<u64>(instr.suatom_d.operation.Value()), | ||
| 479 | static_cast<u64>(instr.suatom_d.operation_type.Value())); | ||
| 480 | return OperationCode::AtomicImageAdd; | ||
| 481 | }(); | ||
| 482 | |||
| 483 | Node value = GetRegister(instr.gpr0); | ||
| 484 | |||
| 485 | const auto type = instr.suatom_d.image_type; | ||
| 486 | auto& image = GetImage(instr.image, type); | ||
| 487 | image.MarkAtomic(); | ||
| 488 | |||
| 489 | MetaImage meta{image, {std::move(value)}}; | ||
| 490 | SetRegister(bb, instr.gpr0, Operation(operation_code, meta, GetCoordinates(type))); | ||
| 491 | break; | ||
| 492 | } | ||
| 493 | default: | ||
| 494 | UNIMPLEMENTED_MSG("Unhandled image instruction: {}", opcode->get().GetName()); | ||
| 495 | } | ||
| 496 | |||
| 497 | return pc; | ||
| 498 | } | ||
| 499 | |||
| 500 | ImageEntry& ShaderIR::GetImage(Tegra::Shader::Image image, Tegra::Shader::ImageType type) { | ||
| 501 | const auto offset = static_cast<u32>(image.index.Value()); | ||
| 502 | |||
| 503 | const auto it = | ||
| 504 | std::find_if(std::begin(used_images), std::end(used_images), | ||
| 505 | [offset](const ImageEntry& entry) { return entry.offset == offset; }); | ||
| 506 | if (it != std::end(used_images)) { | ||
| 507 | ASSERT(!it->is_bindless && it->type == type); | ||
| 508 | return *it; | ||
| 509 | } | ||
| 510 | |||
| 511 | const auto next_index = static_cast<u32>(used_images.size()); | ||
| 512 | return used_images.emplace_back(next_index, offset, type); | ||
| 513 | } | ||
| 514 | |||
| 515 | ImageEntry& ShaderIR::GetBindlessImage(Tegra::Shader::Register reg, Tegra::Shader::ImageType type) { | ||
| 516 | const Node image_register = GetRegister(reg); | ||
| 517 | const auto result = | ||
| 518 | TrackCbuf(image_register, global_code, static_cast<s64>(global_code.size())); | ||
| 519 | |||
| 520 | const auto buffer = std::get<1>(result); | ||
| 521 | const auto offset = std::get<2>(result); | ||
| 522 | |||
| 523 | const auto it = std::find_if(std::begin(used_images), std::end(used_images), | ||
| 524 | [buffer, offset](const ImageEntry& entry) { | ||
| 525 | return entry.buffer == buffer && entry.offset == offset; | ||
| 526 | }); | ||
| 527 | if (it != std::end(used_images)) { | ||
| 528 | ASSERT(it->is_bindless && it->type == type); | ||
| 529 | return *it; | ||
| 530 | } | ||
| 531 | |||
| 532 | const auto next_index = static_cast<u32>(used_images.size()); | ||
| 533 | return used_images.emplace_back(next_index, offset, buffer, type); | ||
| 534 | } | ||
| 535 | |||
| 536 | } // namespace VideoCommon::Shader | ||
diff --git a/src/video_core/shader/decode/integer_set.cpp b/src/video_core/shader/decode/integer_set.cpp deleted file mode 100644 index 59809bcd8..000000000 --- a/src/video_core/shader/decode/integer_set.cpp +++ /dev/null | |||
| @@ -1,49 +0,0 @@ | |||
| 1 | // Copyright 2018 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/common_types.h" | ||
| 6 | #include "video_core/engines/shader_bytecode.h" | ||
| 7 | #include "video_core/shader/node_helper.h" | ||
| 8 | #include "video_core/shader/shader_ir.h" | ||
| 9 | |||
| 10 | namespace VideoCommon::Shader { | ||
| 11 | |||
| 12 | using Tegra::Shader::Instruction; | ||
| 13 | using Tegra::Shader::OpCode; | ||
| 14 | |||
| 15 | u32 ShaderIR::DecodeIntegerSet(NodeBlock& bb, u32 pc) { | ||
| 16 | const Instruction instr = {program_code[pc]}; | ||
| 17 | |||
| 18 | const Node op_a = GetRegister(instr.gpr8); | ||
| 19 | const Node op_b = [&]() { | ||
| 20 | if (instr.is_b_imm) { | ||
| 21 | return Immediate(instr.alu.GetSignedImm20_20()); | ||
| 22 | } else if (instr.is_b_gpr) { | ||
| 23 | return GetRegister(instr.gpr20); | ||
| 24 | } else { | ||
| 25 | return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()); | ||
| 26 | } | ||
| 27 | }(); | ||
| 28 | |||
| 29 | // The iset instruction sets a register to 1.0 or -1 (depending on the bf bit) if the condition | ||
| 30 | // is true, and to 0 otherwise. | ||
| 31 | const Node second_pred = GetPredicate(instr.iset.pred39, instr.iset.neg_pred != 0); | ||
| 32 | const Node first_pred = | ||
| 33 | GetPredicateComparisonInteger(instr.iset.cond, instr.iset.is_signed, op_a, op_b); | ||
| 34 | |||
| 35 | const OperationCode combiner = GetPredicateCombiner(instr.iset.op); | ||
| 36 | |||
| 37 | const Node predicate = Operation(combiner, first_pred, second_pred); | ||
| 38 | |||
| 39 | const Node true_value = instr.iset.bf ? Immediate(1.0f) : Immediate(-1); | ||
| 40 | const Node false_value = instr.iset.bf ? Immediate(0.0f) : Immediate(0); | ||
| 41 | const Node value = | ||
| 42 | Operation(OperationCode::Select, PRECISE, predicate, true_value, false_value); | ||
| 43 | |||
| 44 | SetRegister(bb, instr.gpr0, value); | ||
| 45 | |||
| 46 | return pc; | ||
| 47 | } | ||
| 48 | |||
| 49 | } // namespace VideoCommon::Shader | ||
diff --git a/src/video_core/shader/decode/integer_set_predicate.cpp b/src/video_core/shader/decode/integer_set_predicate.cpp deleted file mode 100644 index 25e48fef8..000000000 --- a/src/video_core/shader/decode/integer_set_predicate.cpp +++ /dev/null | |||
| @@ -1,53 +0,0 @@ | |||
| 1 | // Copyright 2018 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/assert.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "video_core/engines/shader_bytecode.h" | ||
| 8 | #include "video_core/shader/node_helper.h" | ||
| 9 | #include "video_core/shader/shader_ir.h" | ||
| 10 | |||
| 11 | namespace VideoCommon::Shader { | ||
| 12 | |||
| 13 | using Tegra::Shader::Instruction; | ||
| 14 | using Tegra::Shader::OpCode; | ||
| 15 | using Tegra::Shader::Pred; | ||
| 16 | |||
| 17 | u32 ShaderIR::DecodeIntegerSetPredicate(NodeBlock& bb, u32 pc) { | ||
| 18 | const Instruction instr = {program_code[pc]}; | ||
| 19 | |||
| 20 | const Node op_a = GetRegister(instr.gpr8); | ||
| 21 | |||
| 22 | const Node op_b = [&]() { | ||
| 23 | if (instr.is_b_imm) { | ||
| 24 | return Immediate(instr.alu.GetSignedImm20_20()); | ||
| 25 | } else if (instr.is_b_gpr) { | ||
| 26 | return GetRegister(instr.gpr20); | ||
| 27 | } else { | ||
| 28 | return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()); | ||
| 29 | } | ||
| 30 | }(); | ||
| 31 | |||
| 32 | // We can't use the constant predicate as destination. | ||
| 33 | ASSERT(instr.isetp.pred3 != static_cast<u64>(Pred::UnusedIndex)); | ||
| 34 | |||
| 35 | const Node second_pred = GetPredicate(instr.isetp.pred39, instr.isetp.neg_pred != 0); | ||
| 36 | const Node predicate = | ||
| 37 | GetPredicateComparisonInteger(instr.isetp.cond, instr.isetp.is_signed, op_a, op_b); | ||
| 38 | |||
| 39 | // Set the primary predicate to the result of Predicate OP SecondPredicate | ||
| 40 | const OperationCode combiner = GetPredicateCombiner(instr.isetp.op); | ||
| 41 | const Node value = Operation(combiner, predicate, second_pred); | ||
| 42 | SetPredicate(bb, instr.isetp.pred3, value); | ||
| 43 | |||
| 44 | if (instr.isetp.pred0 != static_cast<u64>(Pred::UnusedIndex)) { | ||
| 45 | // Set the secondary predicate to the result of !Predicate OP SecondPredicate, if enabled | ||
| 46 | const Node negated_pred = Operation(OperationCode::LogicalNegate, predicate); | ||
| 47 | SetPredicate(bb, instr.isetp.pred0, Operation(combiner, negated_pred, second_pred)); | ||
| 48 | } | ||
| 49 | |||
| 50 | return pc; | ||
| 51 | } | ||
| 52 | |||
| 53 | } // namespace VideoCommon::Shader | ||
diff --git a/src/video_core/shader/decode/memory.cpp b/src/video_core/shader/decode/memory.cpp deleted file mode 100644 index 7728f600e..000000000 --- a/src/video_core/shader/decode/memory.cpp +++ /dev/null | |||
| @@ -1,493 +0,0 @@ | |||
| 1 | // Copyright 2018 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <algorithm> | ||
| 6 | #include <utility> | ||
| 7 | #include <vector> | ||
| 8 | |||
| 9 | #include <fmt/format.h> | ||
| 10 | |||
| 11 | #include "common/alignment.h" | ||
| 12 | #include "common/assert.h" | ||
| 13 | #include "common/common_types.h" | ||
| 14 | #include "common/logging/log.h" | ||
| 15 | #include "video_core/engines/shader_bytecode.h" | ||
| 16 | #include "video_core/shader/node_helper.h" | ||
| 17 | #include "video_core/shader/shader_ir.h" | ||
| 18 | |||
| 19 | namespace VideoCommon::Shader { | ||
| 20 | |||
| 21 | using std::move; | ||
| 22 | using Tegra::Shader::AtomicOp; | ||
| 23 | using Tegra::Shader::AtomicType; | ||
| 24 | using Tegra::Shader::Attribute; | ||
| 25 | using Tegra::Shader::GlobalAtomicType; | ||
| 26 | using Tegra::Shader::Instruction; | ||
| 27 | using Tegra::Shader::OpCode; | ||
| 28 | using Tegra::Shader::Register; | ||
| 29 | using Tegra::Shader::StoreType; | ||
| 30 | |||
| 31 | namespace { | ||
| 32 | |||
| 33 | OperationCode GetAtomOperation(AtomicOp op) { | ||
| 34 | switch (op) { | ||
| 35 | case AtomicOp::Add: | ||
| 36 | return OperationCode::AtomicIAdd; | ||
| 37 | case AtomicOp::Min: | ||
| 38 | return OperationCode::AtomicIMin; | ||
| 39 | case AtomicOp::Max: | ||
| 40 | return OperationCode::AtomicIMax; | ||
| 41 | case AtomicOp::And: | ||
| 42 | return OperationCode::AtomicIAnd; | ||
| 43 | case AtomicOp::Or: | ||
| 44 | return OperationCode::AtomicIOr; | ||
| 45 | case AtomicOp::Xor: | ||
| 46 | return OperationCode::AtomicIXor; | ||
| 47 | case AtomicOp::Exch: | ||
| 48 | return OperationCode::AtomicIExchange; | ||
| 49 | default: | ||
| 50 | UNIMPLEMENTED_MSG("op={}", op); | ||
| 51 | return OperationCode::AtomicIAdd; | ||
| 52 | } | ||
| 53 | } | ||
| 54 | |||
| 55 | bool IsUnaligned(Tegra::Shader::UniformType uniform_type) { | ||
| 56 | return uniform_type == Tegra::Shader::UniformType::UnsignedByte || | ||
| 57 | uniform_type == Tegra::Shader::UniformType::UnsignedShort; | ||
| 58 | } | ||
| 59 | |||
| 60 | u32 GetUnalignedMask(Tegra::Shader::UniformType uniform_type) { | ||
| 61 | switch (uniform_type) { | ||
| 62 | case Tegra::Shader::UniformType::UnsignedByte: | ||
| 63 | return 0b11; | ||
| 64 | case Tegra::Shader::UniformType::UnsignedShort: | ||
| 65 | return 0b10; | ||
| 66 | default: | ||
| 67 | UNREACHABLE(); | ||
| 68 | return 0; | ||
| 69 | } | ||
| 70 | } | ||
| 71 | |||
| 72 | u32 GetMemorySize(Tegra::Shader::UniformType uniform_type) { | ||
| 73 | switch (uniform_type) { | ||
| 74 | case Tegra::Shader::UniformType::UnsignedByte: | ||
| 75 | return 8; | ||
| 76 | case Tegra::Shader::UniformType::UnsignedShort: | ||
| 77 | return 16; | ||
| 78 | case Tegra::Shader::UniformType::Single: | ||
| 79 | return 32; | ||
| 80 | case Tegra::Shader::UniformType::Double: | ||
| 81 | return 64; | ||
| 82 | case Tegra::Shader::UniformType::Quad: | ||
| 83 | case Tegra::Shader::UniformType::UnsignedQuad: | ||
| 84 | return 128; | ||
| 85 | default: | ||
| 86 | UNIMPLEMENTED_MSG("Unimplemented size={}!", uniform_type); | ||
| 87 | return 32; | ||
| 88 | } | ||
| 89 | } | ||
| 90 | |||
| 91 | Node ExtractUnaligned(Node value, Node address, u32 mask, u32 size) { | ||
| 92 | Node offset = Operation(OperationCode::UBitwiseAnd, address, Immediate(mask)); | ||
| 93 | offset = Operation(OperationCode::ULogicalShiftLeft, move(offset), Immediate(3)); | ||
| 94 | return Operation(OperationCode::UBitfieldExtract, move(value), move(offset), Immediate(size)); | ||
| 95 | } | ||
| 96 | |||
| 97 | Node InsertUnaligned(Node dest, Node value, Node address, u32 mask, u32 size) { | ||
| 98 | Node offset = Operation(OperationCode::UBitwiseAnd, move(address), Immediate(mask)); | ||
| 99 | offset = Operation(OperationCode::ULogicalShiftLeft, move(offset), Immediate(3)); | ||
| 100 | return Operation(OperationCode::UBitfieldInsert, move(dest), move(value), move(offset), | ||
| 101 | Immediate(size)); | ||
| 102 | } | ||
| 103 | |||
| 104 | Node Sign16Extend(Node value) { | ||
| 105 | Node sign = Operation(OperationCode::UBitwiseAnd, value, Immediate(1U << 15)); | ||
| 106 | Node is_sign = Operation(OperationCode::LogicalUEqual, move(sign), Immediate(1U << 15)); | ||
| 107 | Node extend = Operation(OperationCode::Select, is_sign, Immediate(0xFFFF0000), Immediate(0)); | ||
| 108 | return Operation(OperationCode::UBitwiseOr, move(value), move(extend)); | ||
| 109 | } | ||
| 110 | |||
| 111 | } // Anonymous namespace | ||
| 112 | |||
| 113 | u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { | ||
| 114 | const Instruction instr = {program_code[pc]}; | ||
| 115 | const auto opcode = OpCode::Decode(instr); | ||
| 116 | |||
| 117 | switch (opcode->get().GetId()) { | ||
| 118 | case OpCode::Id::LD_A: { | ||
| 119 | // Note: Shouldn't this be interp mode flat? As in no interpolation made. | ||
| 120 | UNIMPLEMENTED_IF_MSG(instr.gpr8.Value() != Register::ZeroIndex, | ||
| 121 | "Indirect attribute loads are not supported"); | ||
| 122 | UNIMPLEMENTED_IF_MSG((instr.attribute.fmt20.immediate.Value() % sizeof(u32)) != 0, | ||
| 123 | "Unaligned attribute loads are not supported"); | ||
| 124 | UNIMPLEMENTED_IF_MSG(instr.attribute.fmt20.IsPhysical() && | ||
| 125 | instr.attribute.fmt20.size != Tegra::Shader::AttributeSize::Word, | ||
| 126 | "Non-32 bits PHYS reads are not implemented"); | ||
| 127 | |||
| 128 | const Node buffer{GetRegister(instr.gpr39)}; | ||
| 129 | |||
| 130 | u64 next_element = instr.attribute.fmt20.element; | ||
| 131 | auto next_index = static_cast<u64>(instr.attribute.fmt20.index.Value()); | ||
| 132 | |||
| 133 | const auto LoadNextElement = [&](u32 reg_offset) { | ||
| 134 | const Node attribute{instr.attribute.fmt20.IsPhysical() | ||
| 135 | ? GetPhysicalInputAttribute(instr.gpr8, buffer) | ||
| 136 | : GetInputAttribute(static_cast<Attribute::Index>(next_index), | ||
| 137 | next_element, buffer)}; | ||
| 138 | |||
| 139 | SetRegister(bb, instr.gpr0.Value() + reg_offset, attribute); | ||
| 140 | |||
| 141 | // Load the next attribute element into the following register. If the element | ||
| 142 | // to load goes beyond the vec4 size, load the first element of the next | ||
| 143 | // attribute. | ||
| 144 | next_element = (next_element + 1) % 4; | ||
| 145 | next_index = next_index + (next_element == 0 ? 1 : 0); | ||
| 146 | }; | ||
| 147 | |||
| 148 | const u32 num_words = static_cast<u32>(instr.attribute.fmt20.size.Value()) + 1; | ||
| 149 | for (u32 reg_offset = 0; reg_offset < num_words; ++reg_offset) { | ||
| 150 | LoadNextElement(reg_offset); | ||
| 151 | } | ||
| 152 | break; | ||
| 153 | } | ||
| 154 | case OpCode::Id::LD_C: { | ||
| 155 | UNIMPLEMENTED_IF(instr.ld_c.unknown != 0); | ||
| 156 | |||
| 157 | Node index = GetRegister(instr.gpr8); | ||
| 158 | |||
| 159 | const Node op_a = | ||
| 160 | GetConstBufferIndirect(instr.cbuf36.index, instr.cbuf36.GetOffset() + 0, index); | ||
| 161 | |||
| 162 | switch (instr.ld_c.type.Value()) { | ||
| 163 | case Tegra::Shader::UniformType::Single: | ||
| 164 | SetRegister(bb, instr.gpr0, op_a); | ||
| 165 | break; | ||
| 166 | |||
| 167 | case Tegra::Shader::UniformType::Double: { | ||
| 168 | const Node op_b = | ||
| 169 | GetConstBufferIndirect(instr.cbuf36.index, instr.cbuf36.GetOffset() + 4, index); | ||
| 170 | |||
| 171 | SetTemporary(bb, 0, op_a); | ||
| 172 | SetTemporary(bb, 1, op_b); | ||
| 173 | SetRegister(bb, instr.gpr0, GetTemporary(0)); | ||
| 174 | SetRegister(bb, instr.gpr0.Value() + 1, GetTemporary(1)); | ||
| 175 | break; | ||
| 176 | } | ||
| 177 | default: | ||
| 178 | UNIMPLEMENTED_MSG("Unhandled type: {}", instr.ld_c.type.Value()); | ||
| 179 | } | ||
| 180 | break; | ||
| 181 | } | ||
| 182 | case OpCode::Id::LD_L: | ||
| 183 | LOG_DEBUG(HW_GPU, "LD_L cache management mode: {}", instr.ld_l.unknown); | ||
| 184 | [[fallthrough]]; | ||
| 185 | case OpCode::Id::LD_S: { | ||
| 186 | const auto GetAddress = [&](s32 offset) { | ||
| 187 | ASSERT(offset % 4 == 0); | ||
| 188 | const Node immediate_offset = Immediate(static_cast<s32>(instr.smem_imm) + offset); | ||
| 189 | return Operation(OperationCode::IAdd, GetRegister(instr.gpr8), immediate_offset); | ||
| 190 | }; | ||
| 191 | const auto GetMemory = [&](s32 offset) { | ||
| 192 | return opcode->get().GetId() == OpCode::Id::LD_S ? GetSharedMemory(GetAddress(offset)) | ||
| 193 | : GetLocalMemory(GetAddress(offset)); | ||
| 194 | }; | ||
| 195 | |||
| 196 | switch (instr.ldst_sl.type.Value()) { | ||
| 197 | case StoreType::Signed16: | ||
| 198 | SetRegister(bb, instr.gpr0, | ||
| 199 | Sign16Extend(ExtractUnaligned(GetMemory(0), GetAddress(0), 0b10, 16))); | ||
| 200 | break; | ||
| 201 | case StoreType::Bits32: | ||
| 202 | case StoreType::Bits64: | ||
| 203 | case StoreType::Bits128: { | ||
| 204 | const u32 count = [&] { | ||
| 205 | switch (instr.ldst_sl.type.Value()) { | ||
| 206 | case StoreType::Bits32: | ||
| 207 | return 1; | ||
| 208 | case StoreType::Bits64: | ||
| 209 | return 2; | ||
| 210 | case StoreType::Bits128: | ||
| 211 | return 4; | ||
| 212 | default: | ||
| 213 | UNREACHABLE(); | ||
| 214 | return 0; | ||
| 215 | } | ||
| 216 | }(); | ||
| 217 | for (u32 i = 0; i < count; ++i) { | ||
| 218 | SetTemporary(bb, i, GetMemory(i * 4)); | ||
| 219 | } | ||
| 220 | for (u32 i = 0; i < count; ++i) { | ||
| 221 | SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i)); | ||
| 222 | } | ||
| 223 | break; | ||
| 224 | } | ||
| 225 | default: | ||
| 226 | UNIMPLEMENTED_MSG("{} Unhandled type: {}", opcode->get().GetName(), | ||
| 227 | instr.ldst_sl.type.Value()); | ||
| 228 | } | ||
| 229 | break; | ||
| 230 | } | ||
| 231 | case OpCode::Id::LD: | ||
| 232 | case OpCode::Id::LDG: { | ||
| 233 | const auto type = [instr, &opcode]() -> Tegra::Shader::UniformType { | ||
| 234 | switch (opcode->get().GetId()) { | ||
| 235 | case OpCode::Id::LD: | ||
| 236 | UNIMPLEMENTED_IF_MSG(!instr.generic.extended, "Unextended LD is not implemented"); | ||
| 237 | return instr.generic.type; | ||
| 238 | case OpCode::Id::LDG: | ||
| 239 | return instr.ldg.type; | ||
| 240 | default: | ||
| 241 | UNREACHABLE(); | ||
| 242 | return {}; | ||
| 243 | } | ||
| 244 | }(); | ||
| 245 | |||
| 246 | const auto [real_address_base, base_address, descriptor] = | ||
| 247 | TrackGlobalMemory(bb, instr, true, false); | ||
| 248 | |||
| 249 | const u32 size = GetMemorySize(type); | ||
| 250 | const u32 count = Common::AlignUp(size, 32) / 32; | ||
| 251 | if (!real_address_base || !base_address) { | ||
| 252 | // Tracking failed, load zeroes. | ||
| 253 | for (u32 i = 0; i < count; ++i) { | ||
| 254 | SetRegister(bb, instr.gpr0.Value() + i, Immediate(0.0f)); | ||
| 255 | } | ||
| 256 | break; | ||
| 257 | } | ||
| 258 | |||
| 259 | for (u32 i = 0; i < count; ++i) { | ||
| 260 | const Node it_offset = Immediate(i * 4); | ||
| 261 | const Node real_address = Operation(OperationCode::UAdd, real_address_base, it_offset); | ||
| 262 | Node gmem = MakeNode<GmemNode>(real_address, base_address, descriptor); | ||
| 263 | |||
| 264 | // To handle unaligned loads get the bytes used to dereference global memory and extract | ||
| 265 | // those bytes from the loaded u32. | ||
| 266 | if (IsUnaligned(type)) { | ||
| 267 | gmem = ExtractUnaligned(gmem, real_address, GetUnalignedMask(type), size); | ||
| 268 | } | ||
| 269 | |||
| 270 | SetTemporary(bb, i, gmem); | ||
| 271 | } | ||
| 272 | |||
| 273 | for (u32 i = 0; i < count; ++i) { | ||
| 274 | SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i)); | ||
| 275 | } | ||
| 276 | break; | ||
| 277 | } | ||
| 278 | case OpCode::Id::ST_A: { | ||
| 279 | UNIMPLEMENTED_IF_MSG(instr.gpr8.Value() != Register::ZeroIndex, | ||
| 280 | "Indirect attribute loads are not supported"); | ||
| 281 | UNIMPLEMENTED_IF_MSG((instr.attribute.fmt20.immediate.Value() % sizeof(u32)) != 0, | ||
| 282 | "Unaligned attribute loads are not supported"); | ||
| 283 | |||
| 284 | u64 element = instr.attribute.fmt20.element; | ||
| 285 | auto index = static_cast<u64>(instr.attribute.fmt20.index.Value()); | ||
| 286 | |||
| 287 | const u32 num_words = static_cast<u32>(instr.attribute.fmt20.size.Value()) + 1; | ||
| 288 | for (u32 reg_offset = 0; reg_offset < num_words; ++reg_offset) { | ||
| 289 | Node dest; | ||
| 290 | if (instr.attribute.fmt20.patch) { | ||
| 291 | const u32 offset = static_cast<u32>(index) * 4 + static_cast<u32>(element); | ||
| 292 | dest = MakeNode<PatchNode>(offset); | ||
| 293 | } else { | ||
| 294 | dest = GetOutputAttribute(static_cast<Attribute::Index>(index), element, | ||
| 295 | GetRegister(instr.gpr39)); | ||
| 296 | } | ||
| 297 | const auto src = GetRegister(instr.gpr0.Value() + reg_offset); | ||
| 298 | |||
| 299 | bb.push_back(Operation(OperationCode::Assign, dest, src)); | ||
| 300 | |||
| 301 | // Load the next attribute element into the following register. If the element to load | ||
| 302 | // goes beyond the vec4 size, load the first element of the next attribute. | ||
| 303 | element = (element + 1) % 4; | ||
| 304 | index = index + (element == 0 ? 1 : 0); | ||
| 305 | } | ||
| 306 | break; | ||
| 307 | } | ||
| 308 | case OpCode::Id::ST_L: | ||
| 309 | LOG_DEBUG(HW_GPU, "ST_L cache management mode: {}", instr.st_l.cache_management.Value()); | ||
| 310 | [[fallthrough]]; | ||
| 311 | case OpCode::Id::ST_S: { | ||
| 312 | const auto GetAddress = [&](s32 offset) { | ||
| 313 | ASSERT(offset % 4 == 0); | ||
| 314 | const Node immediate = Immediate(static_cast<s32>(instr.smem_imm) + offset); | ||
| 315 | return Operation(OperationCode::IAdd, NO_PRECISE, GetRegister(instr.gpr8), immediate); | ||
| 316 | }; | ||
| 317 | |||
| 318 | const bool is_local = opcode->get().GetId() == OpCode::Id::ST_L; | ||
| 319 | const auto set_memory = is_local ? &ShaderIR::SetLocalMemory : &ShaderIR::SetSharedMemory; | ||
| 320 | const auto get_memory = is_local ? &ShaderIR::GetLocalMemory : &ShaderIR::GetSharedMemory; | ||
| 321 | |||
| 322 | switch (instr.ldst_sl.type.Value()) { | ||
| 323 | case StoreType::Bits128: | ||
| 324 | (this->*set_memory)(bb, GetAddress(12), GetRegister(instr.gpr0.Value() + 3)); | ||
| 325 | (this->*set_memory)(bb, GetAddress(8), GetRegister(instr.gpr0.Value() + 2)); | ||
| 326 | [[fallthrough]]; | ||
| 327 | case StoreType::Bits64: | ||
| 328 | (this->*set_memory)(bb, GetAddress(4), GetRegister(instr.gpr0.Value() + 1)); | ||
| 329 | [[fallthrough]]; | ||
| 330 | case StoreType::Bits32: | ||
| 331 | (this->*set_memory)(bb, GetAddress(0), GetRegister(instr.gpr0)); | ||
| 332 | break; | ||
| 333 | case StoreType::Unsigned16: | ||
| 334 | case StoreType::Signed16: { | ||
| 335 | Node address = GetAddress(0); | ||
| 336 | Node memory = (this->*get_memory)(address); | ||
| 337 | (this->*set_memory)( | ||
| 338 | bb, address, InsertUnaligned(memory, GetRegister(instr.gpr0), address, 0b10, 16)); | ||
| 339 | break; | ||
| 340 | } | ||
| 341 | default: | ||
| 342 | UNIMPLEMENTED_MSG("{} unhandled type: {}", opcode->get().GetName(), | ||
| 343 | instr.ldst_sl.type.Value()); | ||
| 344 | } | ||
| 345 | break; | ||
| 346 | } | ||
| 347 | case OpCode::Id::ST: | ||
| 348 | case OpCode::Id::STG: { | ||
| 349 | const auto type = [instr, &opcode]() -> Tegra::Shader::UniformType { | ||
| 350 | switch (opcode->get().GetId()) { | ||
| 351 | case OpCode::Id::ST: | ||
| 352 | UNIMPLEMENTED_IF_MSG(!instr.generic.extended, "Unextended ST is not implemented"); | ||
| 353 | return instr.generic.type; | ||
| 354 | case OpCode::Id::STG: | ||
| 355 | return instr.stg.type; | ||
| 356 | default: | ||
| 357 | UNREACHABLE(); | ||
| 358 | return {}; | ||
| 359 | } | ||
| 360 | }(); | ||
| 361 | |||
| 362 | // For unaligned reads we have to read memory too. | ||
| 363 | const bool is_read = IsUnaligned(type); | ||
| 364 | const auto [real_address_base, base_address, descriptor] = | ||
| 365 | TrackGlobalMemory(bb, instr, is_read, true); | ||
| 366 | if (!real_address_base || !base_address) { | ||
| 367 | // Tracking failed, skip the store. | ||
| 368 | break; | ||
| 369 | } | ||
| 370 | |||
| 371 | const u32 size = GetMemorySize(type); | ||
| 372 | const u32 count = Common::AlignUp(size, 32) / 32; | ||
| 373 | for (u32 i = 0; i < count; ++i) { | ||
| 374 | const Node it_offset = Immediate(i * 4); | ||
| 375 | const Node real_address = Operation(OperationCode::UAdd, real_address_base, it_offset); | ||
| 376 | const Node gmem = MakeNode<GmemNode>(real_address, base_address, descriptor); | ||
| 377 | Node value = GetRegister(instr.gpr0.Value() + i); | ||
| 378 | |||
| 379 | if (IsUnaligned(type)) { | ||
| 380 | const u32 mask = GetUnalignedMask(type); | ||
| 381 | value = InsertUnaligned(gmem, move(value), real_address, mask, size); | ||
| 382 | } | ||
| 383 | |||
| 384 | bb.push_back(Operation(OperationCode::Assign, gmem, value)); | ||
| 385 | } | ||
| 386 | break; | ||
| 387 | } | ||
| 388 | case OpCode::Id::RED: { | ||
| 389 | UNIMPLEMENTED_IF_MSG(instr.red.type != GlobalAtomicType::U32, "type={}", | ||
| 390 | instr.red.type.Value()); | ||
| 391 | const auto [real_address, base_address, descriptor] = | ||
| 392 | TrackGlobalMemory(bb, instr, true, true); | ||
| 393 | if (!real_address || !base_address) { | ||
| 394 | // Tracking failed, skip atomic. | ||
| 395 | break; | ||
| 396 | } | ||
| 397 | Node gmem = MakeNode<GmemNode>(real_address, base_address, descriptor); | ||
| 398 | Node value = GetRegister(instr.gpr0); | ||
| 399 | bb.push_back(Operation(GetAtomOperation(instr.red.operation), move(gmem), move(value))); | ||
| 400 | break; | ||
| 401 | } | ||
| 402 | case OpCode::Id::ATOM: { | ||
| 403 | UNIMPLEMENTED_IF_MSG(instr.atom.operation == AtomicOp::Inc || | ||
| 404 | instr.atom.operation == AtomicOp::Dec || | ||
| 405 | instr.atom.operation == AtomicOp::SafeAdd, | ||
| 406 | "operation={}", instr.atom.operation.Value()); | ||
| 407 | UNIMPLEMENTED_IF_MSG(instr.atom.type == GlobalAtomicType::S64 || | ||
| 408 | instr.atom.type == GlobalAtomicType::U64 || | ||
| 409 | instr.atom.type == GlobalAtomicType::F16x2_FTZ_RN || | ||
| 410 | instr.atom.type == GlobalAtomicType::F32_FTZ_RN, | ||
| 411 | "type={}", instr.atom.type.Value()); | ||
| 412 | |||
| 413 | const auto [real_address, base_address, descriptor] = | ||
| 414 | TrackGlobalMemory(bb, instr, true, true); | ||
| 415 | if (!real_address || !base_address) { | ||
| 416 | // Tracking failed, skip atomic. | ||
| 417 | break; | ||
| 418 | } | ||
| 419 | |||
| 420 | const bool is_signed = | ||
| 421 | instr.atom.type == GlobalAtomicType::S32 || instr.atom.type == GlobalAtomicType::S64; | ||
| 422 | Node gmem = MakeNode<GmemNode>(real_address, base_address, descriptor); | ||
| 423 | SetRegister(bb, instr.gpr0, | ||
| 424 | SignedOperation(GetAtomOperation(instr.atom.operation), is_signed, gmem, | ||
| 425 | GetRegister(instr.gpr20))); | ||
| 426 | break; | ||
| 427 | } | ||
| 428 | case OpCode::Id::ATOMS: { | ||
| 429 | UNIMPLEMENTED_IF_MSG(instr.atoms.operation == AtomicOp::Inc || | ||
| 430 | instr.atoms.operation == AtomicOp::Dec, | ||
| 431 | "operation={}", instr.atoms.operation.Value()); | ||
| 432 | UNIMPLEMENTED_IF_MSG(instr.atoms.type == AtomicType::S64 || | ||
| 433 | instr.atoms.type == AtomicType::U64, | ||
| 434 | "type={}", instr.atoms.type.Value()); | ||
| 435 | const bool is_signed = | ||
| 436 | instr.atoms.type == AtomicType::S32 || instr.atoms.type == AtomicType::S64; | ||
| 437 | const s32 offset = instr.atoms.GetImmediateOffset(); | ||
| 438 | Node address = GetRegister(instr.gpr8); | ||
| 439 | address = Operation(OperationCode::IAdd, move(address), Immediate(offset)); | ||
| 440 | SetRegister(bb, instr.gpr0, | ||
| 441 | SignedOperation(GetAtomOperation(instr.atoms.operation), is_signed, | ||
| 442 | GetSharedMemory(move(address)), GetRegister(instr.gpr20))); | ||
| 443 | break; | ||
| 444 | } | ||
| 445 | case OpCode::Id::AL2P: { | ||
| 446 | // Ignore al2p.direction since we don't care about it. | ||
| 447 | |||
| 448 | // Calculate emulation fake physical address. | ||
| 449 | const Node fixed_address{Immediate(static_cast<u32>(instr.al2p.address))}; | ||
| 450 | const Node reg{GetRegister(instr.gpr8)}; | ||
| 451 | const Node fake_address{Operation(OperationCode::IAdd, NO_PRECISE, reg, fixed_address)}; | ||
| 452 | |||
| 453 | // Set the fake address to target register. | ||
| 454 | SetRegister(bb, instr.gpr0, fake_address); | ||
| 455 | |||
| 456 | // Signal the shader IR to declare all possible attributes and varyings | ||
| 457 | uses_physical_attributes = true; | ||
| 458 | break; | ||
| 459 | } | ||
| 460 | default: | ||
| 461 | UNIMPLEMENTED_MSG("Unhandled memory instruction: {}", opcode->get().GetName()); | ||
| 462 | } | ||
| 463 | |||
| 464 | return pc; | ||
| 465 | } | ||
| 466 | |||
| 467 | std::tuple<Node, Node, GlobalMemoryBase> ShaderIR::TrackGlobalMemory(NodeBlock& bb, | ||
| 468 | Instruction instr, | ||
| 469 | bool is_read, bool is_write) { | ||
| 470 | const auto addr_register{GetRegister(instr.gmem.gpr)}; | ||
| 471 | const auto immediate_offset{static_cast<u32>(instr.gmem.offset)}; | ||
| 472 | |||
| 473 | const auto [base_address, index, offset] = | ||
| 474 | TrackCbuf(addr_register, global_code, static_cast<s64>(global_code.size())); | ||
| 475 | ASSERT_OR_EXECUTE_MSG( | ||
| 476 | base_address != nullptr, { return std::make_tuple(nullptr, nullptr, GlobalMemoryBase{}); }, | ||
| 477 | "Global memory tracking failed"); | ||
| 478 | |||
| 479 | bb.push_back(Comment(fmt::format("Base address is c[0x{:x}][0x{:x}]", index, offset))); | ||
| 480 | |||
| 481 | const GlobalMemoryBase descriptor{index, offset}; | ||
| 482 | const auto& entry = used_global_memory.try_emplace(descriptor).first; | ||
| 483 | auto& usage = entry->second; | ||
| 484 | usage.is_written |= is_write; | ||
| 485 | usage.is_read |= is_read; | ||
| 486 | |||
| 487 | const auto real_address = | ||
| 488 | Operation(OperationCode::UAdd, NO_PRECISE, Immediate(immediate_offset), addr_register); | ||
| 489 | |||
| 490 | return {real_address, base_address, descriptor}; | ||
| 491 | } | ||
| 492 | |||
| 493 | } // namespace VideoCommon::Shader | ||
diff --git a/src/video_core/shader/decode/other.cpp b/src/video_core/shader/decode/other.cpp deleted file mode 100644 index 5f88537bc..000000000 --- a/src/video_core/shader/decode/other.cpp +++ /dev/null | |||
| @@ -1,322 +0,0 @@ | |||
| 1 | // Copyright 2018 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/assert.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "common/logging/log.h" | ||
| 8 | #include "video_core/engines/shader_bytecode.h" | ||
| 9 | #include "video_core/shader/node_helper.h" | ||
| 10 | #include "video_core/shader/shader_ir.h" | ||
| 11 | |||
| 12 | namespace VideoCommon::Shader { | ||
| 13 | |||
| 14 | using std::move; | ||
| 15 | using Tegra::Shader::ConditionCode; | ||
| 16 | using Tegra::Shader::Instruction; | ||
| 17 | using Tegra::Shader::IpaInterpMode; | ||
| 18 | using Tegra::Shader::OpCode; | ||
| 19 | using Tegra::Shader::PixelImap; | ||
| 20 | using Tegra::Shader::Register; | ||
| 21 | using Tegra::Shader::SystemVariable; | ||
| 22 | |||
| 23 | using Index = Tegra::Shader::Attribute::Index; | ||
| 24 | |||
| 25 | u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) { | ||
| 26 | const Instruction instr = {program_code[pc]}; | ||
| 27 | const auto opcode = OpCode::Decode(instr); | ||
| 28 | |||
| 29 | switch (opcode->get().GetId()) { | ||
| 30 | case OpCode::Id::NOP: { | ||
| 31 | UNIMPLEMENTED_IF(instr.nop.cc != Tegra::Shader::ConditionCode::T); | ||
| 32 | UNIMPLEMENTED_IF(instr.nop.trigger != 0); | ||
| 33 | // With the previous preconditions, this instruction is a no-operation. | ||
| 34 | break; | ||
| 35 | } | ||
| 36 | case OpCode::Id::EXIT: { | ||
| 37 | const ConditionCode cc = instr.flow_condition_code; | ||
| 38 | UNIMPLEMENTED_IF_MSG(cc != ConditionCode::T, "EXIT condition code used: {}", cc); | ||
| 39 | |||
| 40 | switch (instr.flow.cond) { | ||
| 41 | case Tegra::Shader::FlowCondition::Always: | ||
| 42 | bb.push_back(Operation(OperationCode::Exit)); | ||
| 43 | if (instr.pred.pred_index == static_cast<u64>(Pred::UnusedIndex)) { | ||
| 44 | // If this is an unconditional exit then just end processing here, | ||
| 45 | // otherwise we have to account for the possibility of the condition | ||
| 46 | // not being met, so continue processing the next instruction. | ||
| 47 | pc = MAX_PROGRAM_LENGTH - 1; | ||
| 48 | } | ||
| 49 | break; | ||
| 50 | |||
| 51 | case Tegra::Shader::FlowCondition::Fcsm_Tr: | ||
| 52 | // TODO(bunnei): What is this used for? If we assume this conditon is not | ||
| 53 | // satisifed, dual vertex shaders in Farming Simulator make more sense | ||
| 54 | UNIMPLEMENTED_MSG("Skipping unknown FlowCondition::Fcsm_Tr"); | ||
| 55 | break; | ||
| 56 | |||
| 57 | default: | ||
| 58 | UNIMPLEMENTED_MSG("Unhandled flow condition: {}", instr.flow.cond.Value()); | ||
| 59 | } | ||
| 60 | break; | ||
| 61 | } | ||
| 62 | case OpCode::Id::KIL: { | ||
| 63 | UNIMPLEMENTED_IF(instr.flow.cond != Tegra::Shader::FlowCondition::Always); | ||
| 64 | |||
| 65 | const ConditionCode cc = instr.flow_condition_code; | ||
| 66 | UNIMPLEMENTED_IF_MSG(cc != ConditionCode::T, "KIL condition code used: {}", cc); | ||
| 67 | |||
| 68 | bb.push_back(Operation(OperationCode::Discard)); | ||
| 69 | break; | ||
| 70 | } | ||
| 71 | case OpCode::Id::S2R: { | ||
| 72 | const Node value = [this, instr] { | ||
| 73 | switch (instr.sys20) { | ||
| 74 | case SystemVariable::LaneId: | ||
| 75 | return Operation(OperationCode::ThreadId); | ||
| 76 | case SystemVariable::InvocationId: | ||
| 77 | return Operation(OperationCode::InvocationId); | ||
| 78 | case SystemVariable::Ydirection: | ||
| 79 | uses_y_negate = true; | ||
| 80 | return Operation(OperationCode::YNegate); | ||
| 81 | case SystemVariable::InvocationInfo: | ||
| 82 | LOG_WARNING(HW_GPU, "S2R instruction with InvocationInfo is incomplete"); | ||
| 83 | return Immediate(0x00ff'0000U); | ||
| 84 | case SystemVariable::WscaleFactorXY: | ||
| 85 | UNIMPLEMENTED_MSG("S2R WscaleFactorXY is not implemented"); | ||
| 86 | return Immediate(0U); | ||
| 87 | case SystemVariable::WscaleFactorZ: | ||
| 88 | UNIMPLEMENTED_MSG("S2R WscaleFactorZ is not implemented"); | ||
| 89 | return Immediate(0U); | ||
| 90 | case SystemVariable::Tid: { | ||
| 91 | Node val = Immediate(0); | ||
| 92 | val = BitfieldInsert(val, Operation(OperationCode::LocalInvocationIdX), 0, 9); | ||
| 93 | val = BitfieldInsert(val, Operation(OperationCode::LocalInvocationIdY), 16, 9); | ||
| 94 | val = BitfieldInsert(val, Operation(OperationCode::LocalInvocationIdZ), 26, 5); | ||
| 95 | return val; | ||
| 96 | } | ||
| 97 | case SystemVariable::TidX: | ||
| 98 | return Operation(OperationCode::LocalInvocationIdX); | ||
| 99 | case SystemVariable::TidY: | ||
| 100 | return Operation(OperationCode::LocalInvocationIdY); | ||
| 101 | case SystemVariable::TidZ: | ||
| 102 | return Operation(OperationCode::LocalInvocationIdZ); | ||
| 103 | case SystemVariable::CtaIdX: | ||
| 104 | return Operation(OperationCode::WorkGroupIdX); | ||
| 105 | case SystemVariable::CtaIdY: | ||
| 106 | return Operation(OperationCode::WorkGroupIdY); | ||
| 107 | case SystemVariable::CtaIdZ: | ||
| 108 | return Operation(OperationCode::WorkGroupIdZ); | ||
| 109 | case SystemVariable::EqMask: | ||
| 110 | case SystemVariable::LtMask: | ||
| 111 | case SystemVariable::LeMask: | ||
| 112 | case SystemVariable::GtMask: | ||
| 113 | case SystemVariable::GeMask: | ||
| 114 | uses_warps = true; | ||
| 115 | switch (instr.sys20) { | ||
| 116 | case SystemVariable::EqMask: | ||
| 117 | return Operation(OperationCode::ThreadEqMask); | ||
| 118 | case SystemVariable::LtMask: | ||
| 119 | return Operation(OperationCode::ThreadLtMask); | ||
| 120 | case SystemVariable::LeMask: | ||
| 121 | return Operation(OperationCode::ThreadLeMask); | ||
| 122 | case SystemVariable::GtMask: | ||
| 123 | return Operation(OperationCode::ThreadGtMask); | ||
| 124 | case SystemVariable::GeMask: | ||
| 125 | return Operation(OperationCode::ThreadGeMask); | ||
| 126 | default: | ||
| 127 | UNREACHABLE(); | ||
| 128 | return Immediate(0u); | ||
| 129 | } | ||
| 130 | default: | ||
| 131 | UNIMPLEMENTED_MSG("Unhandled system move: {}", instr.sys20.Value()); | ||
| 132 | return Immediate(0u); | ||
| 133 | } | ||
| 134 | }(); | ||
| 135 | SetRegister(bb, instr.gpr0, value); | ||
| 136 | |||
| 137 | break; | ||
| 138 | } | ||
| 139 | case OpCode::Id::BRA: { | ||
| 140 | Node branch; | ||
| 141 | if (instr.bra.constant_buffer == 0) { | ||
| 142 | const u32 target = pc + instr.bra.GetBranchTarget(); | ||
| 143 | branch = Operation(OperationCode::Branch, Immediate(target)); | ||
| 144 | } else { | ||
| 145 | const u32 target = pc + 1; | ||
| 146 | const Node op_a = GetConstBuffer(instr.cbuf36.index, instr.cbuf36.GetOffset()); | ||
| 147 | const Node convert = SignedOperation(OperationCode::IArithmeticShiftRight, true, | ||
| 148 | PRECISE, op_a, Immediate(3)); | ||
| 149 | const Node operand = | ||
| 150 | Operation(OperationCode::IAdd, PRECISE, convert, Immediate(target)); | ||
| 151 | branch = Operation(OperationCode::BranchIndirect, operand); | ||
| 152 | } | ||
| 153 | |||
| 154 | const Tegra::Shader::ConditionCode cc = instr.flow_condition_code; | ||
| 155 | if (cc != Tegra::Shader::ConditionCode::T) { | ||
| 156 | bb.push_back(Conditional(GetConditionCode(cc), {branch})); | ||
| 157 | } else { | ||
| 158 | bb.push_back(branch); | ||
| 159 | } | ||
| 160 | break; | ||
| 161 | } | ||
| 162 | case OpCode::Id::BRX: { | ||
| 163 | Node operand; | ||
| 164 | if (instr.brx.constant_buffer != 0) { | ||
| 165 | const s32 target = pc + 1; | ||
| 166 | const Node index = GetRegister(instr.gpr8); | ||
| 167 | const Node op_a = | ||
| 168 | GetConstBufferIndirect(instr.cbuf36.index, instr.cbuf36.GetOffset() + 0, index); | ||
| 169 | const Node convert = SignedOperation(OperationCode::IArithmeticShiftRight, true, | ||
| 170 | PRECISE, op_a, Immediate(3)); | ||
| 171 | operand = Operation(OperationCode::IAdd, PRECISE, convert, Immediate(target)); | ||
| 172 | } else { | ||
| 173 | const s32 target = pc + instr.brx.GetBranchExtend(); | ||
| 174 | const Node op_a = GetRegister(instr.gpr8); | ||
| 175 | const Node convert = SignedOperation(OperationCode::IArithmeticShiftRight, true, | ||
| 176 | PRECISE, op_a, Immediate(3)); | ||
| 177 | operand = Operation(OperationCode::IAdd, PRECISE, convert, Immediate(target)); | ||
| 178 | } | ||
| 179 | const Node branch = Operation(OperationCode::BranchIndirect, operand); | ||
| 180 | |||
| 181 | const ConditionCode cc = instr.flow_condition_code; | ||
| 182 | if (cc != ConditionCode::T) { | ||
| 183 | bb.push_back(Conditional(GetConditionCode(cc), {branch})); | ||
| 184 | } else { | ||
| 185 | bb.push_back(branch); | ||
| 186 | } | ||
| 187 | break; | ||
| 188 | } | ||
| 189 | case OpCode::Id::SSY: { | ||
| 190 | UNIMPLEMENTED_IF_MSG(instr.bra.constant_buffer != 0, | ||
| 191 | "Constant buffer flow is not supported"); | ||
| 192 | |||
| 193 | if (disable_flow_stack) { | ||
| 194 | break; | ||
| 195 | } | ||
| 196 | |||
| 197 | // The SSY opcode tells the GPU where to re-converge divergent execution paths with SYNC. | ||
| 198 | const u32 target = pc + instr.bra.GetBranchTarget(); | ||
| 199 | bb.push_back( | ||
| 200 | Operation(OperationCode::PushFlowStack, MetaStackClass::Ssy, Immediate(target))); | ||
| 201 | break; | ||
| 202 | } | ||
| 203 | case OpCode::Id::PBK: { | ||
| 204 | UNIMPLEMENTED_IF_MSG(instr.bra.constant_buffer != 0, | ||
| 205 | "Constant buffer PBK is not supported"); | ||
| 206 | |||
| 207 | if (disable_flow_stack) { | ||
| 208 | break; | ||
| 209 | } | ||
| 210 | |||
| 211 | // PBK pushes to a stack the address where BRK will jump to. | ||
| 212 | const u32 target = pc + instr.bra.GetBranchTarget(); | ||
| 213 | bb.push_back( | ||
| 214 | Operation(OperationCode::PushFlowStack, MetaStackClass::Pbk, Immediate(target))); | ||
| 215 | break; | ||
| 216 | } | ||
| 217 | case OpCode::Id::SYNC: { | ||
| 218 | const ConditionCode cc = instr.flow_condition_code; | ||
| 219 | UNIMPLEMENTED_IF_MSG(cc != ConditionCode::T, "SYNC condition code used: {}", cc); | ||
| 220 | |||
| 221 | if (decompiled) { | ||
| 222 | break; | ||
| 223 | } | ||
| 224 | |||
| 225 | // The SYNC opcode jumps to the address previously set by the SSY opcode | ||
| 226 | bb.push_back(Operation(OperationCode::PopFlowStack, MetaStackClass::Ssy)); | ||
| 227 | break; | ||
| 228 | } | ||
| 229 | case OpCode::Id::BRK: { | ||
| 230 | const ConditionCode cc = instr.flow_condition_code; | ||
| 231 | UNIMPLEMENTED_IF_MSG(cc != ConditionCode::T, "BRK condition code used: {}", cc); | ||
| 232 | if (decompiled) { | ||
| 233 | break; | ||
| 234 | } | ||
| 235 | |||
| 236 | // The BRK opcode jumps to the address previously set by the PBK opcode | ||
| 237 | bb.push_back(Operation(OperationCode::PopFlowStack, MetaStackClass::Pbk)); | ||
| 238 | break; | ||
| 239 | } | ||
| 240 | case OpCode::Id::IPA: { | ||
| 241 | const bool is_physical = instr.ipa.idx && instr.gpr8.Value() != 0xff; | ||
| 242 | const auto attribute = instr.attribute.fmt28; | ||
| 243 | const Index index = attribute.index; | ||
| 244 | |||
| 245 | Node value = is_physical ? GetPhysicalInputAttribute(instr.gpr8) | ||
| 246 | : GetInputAttribute(index, attribute.element); | ||
| 247 | |||
| 248 | // Code taken from Ryujinx. | ||
| 249 | if (index >= Index::Attribute_0 && index <= Index::Attribute_31) { | ||
| 250 | const u32 location = static_cast<u32>(index) - static_cast<u32>(Index::Attribute_0); | ||
| 251 | if (header.ps.GetPixelImap(location) == PixelImap::Perspective) { | ||
| 252 | Node position_w = GetInputAttribute(Index::Position, 3); | ||
| 253 | value = Operation(OperationCode::FMul, move(value), move(position_w)); | ||
| 254 | } | ||
| 255 | } | ||
| 256 | |||
| 257 | if (instr.ipa.interp_mode == IpaInterpMode::Multiply) { | ||
| 258 | value = Operation(OperationCode::FMul, move(value), GetRegister(instr.gpr20)); | ||
| 259 | } | ||
| 260 | |||
| 261 | value = GetSaturatedFloat(move(value), instr.ipa.saturate); | ||
| 262 | |||
| 263 | SetRegister(bb, instr.gpr0, move(value)); | ||
| 264 | break; | ||
| 265 | } | ||
| 266 | case OpCode::Id::OUT_R: { | ||
| 267 | UNIMPLEMENTED_IF_MSG(instr.gpr20.Value() != Register::ZeroIndex, | ||
| 268 | "Stream buffer is not supported"); | ||
| 269 | |||
| 270 | if (instr.out.emit) { | ||
| 271 | // gpr0 is used to store the next address and gpr8 contains the address to emit. | ||
| 272 | // Hardware uses pointers here but we just ignore it | ||
| 273 | bb.push_back(Operation(OperationCode::EmitVertex)); | ||
| 274 | SetRegister(bb, instr.gpr0, Immediate(0)); | ||
| 275 | } | ||
| 276 | if (instr.out.cut) { | ||
| 277 | bb.push_back(Operation(OperationCode::EndPrimitive)); | ||
| 278 | } | ||
| 279 | break; | ||
| 280 | } | ||
| 281 | case OpCode::Id::ISBERD: { | ||
| 282 | UNIMPLEMENTED_IF(instr.isberd.o != 0); | ||
| 283 | UNIMPLEMENTED_IF(instr.isberd.skew != 0); | ||
| 284 | UNIMPLEMENTED_IF(instr.isberd.shift != Tegra::Shader::IsberdShift::None); | ||
| 285 | UNIMPLEMENTED_IF(instr.isberd.mode != Tegra::Shader::IsberdMode::None); | ||
| 286 | LOG_WARNING(HW_GPU, "ISBERD instruction is incomplete"); | ||
| 287 | SetRegister(bb, instr.gpr0, GetRegister(instr.gpr8)); | ||
| 288 | break; | ||
| 289 | } | ||
| 290 | case OpCode::Id::BAR: { | ||
| 291 | UNIMPLEMENTED_IF_MSG(instr.value != 0xF0A81B8000070000ULL, "BAR is not BAR.SYNC 0x0"); | ||
| 292 | bb.push_back(Operation(OperationCode::Barrier)); | ||
| 293 | break; | ||
| 294 | } | ||
| 295 | case OpCode::Id::MEMBAR: { | ||
| 296 | UNIMPLEMENTED_IF(instr.membar.unknown != Tegra::Shader::MembarUnknown::Default); | ||
| 297 | const OperationCode type = [instr] { | ||
| 298 | switch (instr.membar.type) { | ||
| 299 | case Tegra::Shader::MembarType::CTA: | ||
| 300 | return OperationCode::MemoryBarrierGroup; | ||
| 301 | case Tegra::Shader::MembarType::GL: | ||
| 302 | return OperationCode::MemoryBarrierGlobal; | ||
| 303 | default: | ||
| 304 | UNIMPLEMENTED_MSG("MEMBAR type={}", instr.membar.type.Value()); | ||
| 305 | return OperationCode::MemoryBarrierGlobal; | ||
| 306 | } | ||
| 307 | }(); | ||
| 308 | bb.push_back(Operation(type)); | ||
| 309 | break; | ||
| 310 | } | ||
| 311 | case OpCode::Id::DEPBAR: { | ||
| 312 | LOG_DEBUG(HW_GPU, "DEPBAR instruction is stubbed"); | ||
| 313 | break; | ||
| 314 | } | ||
| 315 | default: | ||
| 316 | UNIMPLEMENTED_MSG("Unhandled instruction: {}", opcode->get().GetName()); | ||
| 317 | } | ||
| 318 | |||
| 319 | return pc; | ||
| 320 | } | ||
| 321 | |||
| 322 | } // namespace VideoCommon::Shader | ||
diff --git a/src/video_core/shader/decode/predicate_set_predicate.cpp b/src/video_core/shader/decode/predicate_set_predicate.cpp deleted file mode 100644 index 9290d22eb..000000000 --- a/src/video_core/shader/decode/predicate_set_predicate.cpp +++ /dev/null | |||
| @@ -1,68 +0,0 @@ | |||
| 1 | // Copyright 2018 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/assert.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "video_core/engines/shader_bytecode.h" | ||
| 8 | #include "video_core/shader/node_helper.h" | ||
| 9 | #include "video_core/shader/shader_ir.h" | ||
| 10 | |||
| 11 | namespace VideoCommon::Shader { | ||
| 12 | |||
| 13 | using Tegra::Shader::Instruction; | ||
| 14 | using Tegra::Shader::OpCode; | ||
| 15 | using Tegra::Shader::Pred; | ||
| 16 | |||
| 17 | u32 ShaderIR::DecodePredicateSetPredicate(NodeBlock& bb, u32 pc) { | ||
| 18 | const Instruction instr = {program_code[pc]}; | ||
| 19 | const auto opcode = OpCode::Decode(instr); | ||
| 20 | |||
| 21 | switch (opcode->get().GetId()) { | ||
| 22 | case OpCode::Id::PSETP: { | ||
| 23 | const Node op_a = GetPredicate(instr.psetp.pred12, instr.psetp.neg_pred12 != 0); | ||
| 24 | const Node op_b = GetPredicate(instr.psetp.pred29, instr.psetp.neg_pred29 != 0); | ||
| 25 | |||
| 26 | // We can't use the constant predicate as destination. | ||
| 27 | ASSERT(instr.psetp.pred3 != static_cast<u64>(Pred::UnusedIndex)); | ||
| 28 | |||
| 29 | const Node second_pred = GetPredicate(instr.psetp.pred39, instr.psetp.neg_pred39 != 0); | ||
| 30 | |||
| 31 | const OperationCode combiner = GetPredicateCombiner(instr.psetp.op); | ||
| 32 | const Node predicate = Operation(combiner, op_a, op_b); | ||
| 33 | |||
| 34 | // Set the primary predicate to the result of Predicate OP SecondPredicate | ||
| 35 | SetPredicate(bb, instr.psetp.pred3, Operation(combiner, predicate, second_pred)); | ||
| 36 | |||
| 37 | if (instr.psetp.pred0 != static_cast<u64>(Pred::UnusedIndex)) { | ||
| 38 | // Set the secondary predicate to the result of !Predicate OP SecondPredicate, if | ||
| 39 | // enabled | ||
| 40 | SetPredicate(bb, instr.psetp.pred0, | ||
| 41 | Operation(combiner, Operation(OperationCode::LogicalNegate, predicate), | ||
| 42 | second_pred)); | ||
| 43 | } | ||
| 44 | break; | ||
| 45 | } | ||
| 46 | case OpCode::Id::CSETP: { | ||
| 47 | const Node pred = GetPredicate(instr.csetp.pred39, instr.csetp.neg_pred39 != 0); | ||
| 48 | const Node condition_code = GetConditionCode(instr.csetp.cc); | ||
| 49 | |||
| 50 | const OperationCode combiner = GetPredicateCombiner(instr.csetp.op); | ||
| 51 | |||
| 52 | if (instr.csetp.pred3 != static_cast<u64>(Pred::UnusedIndex)) { | ||
| 53 | SetPredicate(bb, instr.csetp.pred3, Operation(combiner, condition_code, pred)); | ||
| 54 | } | ||
| 55 | if (instr.csetp.pred0 != static_cast<u64>(Pred::UnusedIndex)) { | ||
| 56 | const Node neg_cc = Operation(OperationCode::LogicalNegate, condition_code); | ||
| 57 | SetPredicate(bb, instr.csetp.pred0, Operation(combiner, neg_cc, pred)); | ||
| 58 | } | ||
| 59 | break; | ||
| 60 | } | ||
| 61 | default: | ||
| 62 | UNIMPLEMENTED_MSG("Unhandled predicate instruction: {}", opcode->get().GetName()); | ||
| 63 | } | ||
| 64 | |||
| 65 | return pc; | ||
| 66 | } | ||
| 67 | |||
| 68 | } // namespace VideoCommon::Shader | ||
diff --git a/src/video_core/shader/decode/predicate_set_register.cpp b/src/video_core/shader/decode/predicate_set_register.cpp deleted file mode 100644 index 84dbc50fe..000000000 --- a/src/video_core/shader/decode/predicate_set_register.cpp +++ /dev/null | |||
| @@ -1,46 +0,0 @@ | |||
| 1 | // Copyright 2018 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/assert.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "video_core/engines/shader_bytecode.h" | ||
| 8 | #include "video_core/shader/node_helper.h" | ||
| 9 | #include "video_core/shader/shader_ir.h" | ||
| 10 | |||
| 11 | namespace VideoCommon::Shader { | ||
| 12 | |||
| 13 | using Tegra::Shader::Instruction; | ||
| 14 | using Tegra::Shader::OpCode; | ||
| 15 | |||
| 16 | u32 ShaderIR::DecodePredicateSetRegister(NodeBlock& bb, u32 pc) { | ||
| 17 | const Instruction instr = {program_code[pc]}; | ||
| 18 | |||
| 19 | UNIMPLEMENTED_IF_MSG(instr.generates_cc, | ||
| 20 | "Condition codes generation in PSET is not implemented"); | ||
| 21 | |||
| 22 | const Node op_a = GetPredicate(instr.pset.pred12, instr.pset.neg_pred12 != 0); | ||
| 23 | const Node op_b = GetPredicate(instr.pset.pred29, instr.pset.neg_pred29 != 0); | ||
| 24 | const Node first_pred = Operation(GetPredicateCombiner(instr.pset.cond), op_a, op_b); | ||
| 25 | |||
| 26 | const Node second_pred = GetPredicate(instr.pset.pred39, instr.pset.neg_pred39 != 0); | ||
| 27 | |||
| 28 | const OperationCode combiner = GetPredicateCombiner(instr.pset.op); | ||
| 29 | const Node predicate = Operation(combiner, first_pred, second_pred); | ||
| 30 | |||
| 31 | const Node true_value = instr.pset.bf ? Immediate(1.0f) : Immediate(0xffffffff); | ||
| 32 | const Node false_value = instr.pset.bf ? Immediate(0.0f) : Immediate(0); | ||
| 33 | const Node value = | ||
| 34 | Operation(OperationCode::Select, PRECISE, predicate, true_value, false_value); | ||
| 35 | |||
| 36 | if (instr.pset.bf) { | ||
| 37 | SetInternalFlagsFromFloat(bb, value, instr.generates_cc); | ||
| 38 | } else { | ||
| 39 | SetInternalFlagsFromInteger(bb, value, instr.generates_cc); | ||
| 40 | } | ||
| 41 | SetRegister(bb, instr.gpr0, value); | ||
| 42 | |||
| 43 | return pc; | ||
| 44 | } | ||
| 45 | |||
| 46 | } // namespace VideoCommon::Shader | ||
diff --git a/src/video_core/shader/decode/register_set_predicate.cpp b/src/video_core/shader/decode/register_set_predicate.cpp deleted file mode 100644 index 6116c31aa..000000000 --- a/src/video_core/shader/decode/register_set_predicate.cpp +++ /dev/null | |||
| @@ -1,86 +0,0 @@ | |||
| 1 | // Copyright 2018 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <utility> | ||
| 6 | |||
| 7 | #include "common/assert.h" | ||
| 8 | #include "common/common_types.h" | ||
| 9 | #include "video_core/engines/shader_bytecode.h" | ||
| 10 | #include "video_core/shader/node_helper.h" | ||
| 11 | #include "video_core/shader/shader_ir.h" | ||
| 12 | |||
| 13 | namespace VideoCommon::Shader { | ||
| 14 | |||
| 15 | using std::move; | ||
| 16 | using Tegra::Shader::Instruction; | ||
| 17 | using Tegra::Shader::OpCode; | ||
| 18 | |||
| 19 | namespace { | ||
| 20 | constexpr u64 NUM_CONDITION_CODES = 4; | ||
| 21 | constexpr u64 NUM_PREDICATES = 7; | ||
| 22 | } // namespace | ||
| 23 | |||
| 24 | u32 ShaderIR::DecodeRegisterSetPredicate(NodeBlock& bb, u32 pc) { | ||
| 25 | const Instruction instr = {program_code[pc]}; | ||
| 26 | const auto opcode = OpCode::Decode(instr); | ||
| 27 | |||
| 28 | Node apply_mask = [this, opcode, instr] { | ||
| 29 | switch (opcode->get().GetId()) { | ||
| 30 | case OpCode::Id::R2P_IMM: | ||
| 31 | case OpCode::Id::P2R_IMM: | ||
| 32 | return Immediate(static_cast<u32>(instr.p2r_r2p.immediate_mask)); | ||
| 33 | default: | ||
| 34 | UNREACHABLE(); | ||
| 35 | return Immediate(0); | ||
| 36 | } | ||
| 37 | }(); | ||
| 38 | |||
| 39 | const u32 offset = static_cast<u32>(instr.p2r_r2p.byte) * 8; | ||
| 40 | |||
| 41 | const bool cc = instr.p2r_r2p.mode == Tegra::Shader::R2pMode::Cc; | ||
| 42 | const u64 num_entries = cc ? NUM_CONDITION_CODES : NUM_PREDICATES; | ||
| 43 | const auto get_entry = [this, cc](u64 entry) { | ||
| 44 | return cc ? GetInternalFlag(static_cast<InternalFlag>(entry)) : GetPredicate(entry); | ||
| 45 | }; | ||
| 46 | |||
| 47 | switch (opcode->get().GetId()) { | ||
| 48 | case OpCode::Id::R2P_IMM: { | ||
| 49 | Node mask = GetRegister(instr.gpr8); | ||
| 50 | |||
| 51 | for (u64 entry = 0; entry < num_entries; ++entry) { | ||
| 52 | const u32 shift = static_cast<u32>(entry); | ||
| 53 | |||
| 54 | Node apply = BitfieldExtract(apply_mask, shift, 1); | ||
| 55 | Node condition = Operation(OperationCode::LogicalUNotEqual, apply, Immediate(0)); | ||
| 56 | |||
| 57 | Node compare = BitfieldExtract(mask, offset + shift, 1); | ||
| 58 | Node value = Operation(OperationCode::LogicalUNotEqual, move(compare), Immediate(0)); | ||
| 59 | |||
| 60 | Node code = Operation(OperationCode::LogicalAssign, get_entry(entry), move(value)); | ||
| 61 | bb.push_back(Conditional(condition, {move(code)})); | ||
| 62 | } | ||
| 63 | break; | ||
| 64 | } | ||
| 65 | case OpCode::Id::P2R_IMM: { | ||
| 66 | Node value = Immediate(0); | ||
| 67 | for (u64 entry = 0; entry < num_entries; ++entry) { | ||
| 68 | Node bit = Operation(OperationCode::Select, get_entry(entry), Immediate(1U << entry), | ||
| 69 | Immediate(0)); | ||
| 70 | value = Operation(OperationCode::UBitwiseOr, move(value), move(bit)); | ||
| 71 | } | ||
| 72 | value = Operation(OperationCode::UBitwiseAnd, move(value), apply_mask); | ||
| 73 | value = BitfieldInsert(GetRegister(instr.gpr8), move(value), offset, 8); | ||
| 74 | |||
| 75 | SetRegister(bb, instr.gpr0, move(value)); | ||
| 76 | break; | ||
| 77 | } | ||
| 78 | default: | ||
| 79 | UNIMPLEMENTED_MSG("Unhandled P2R/R2R instruction: {}", opcode->get().GetName()); | ||
| 80 | break; | ||
| 81 | } | ||
| 82 | |||
| 83 | return pc; | ||
| 84 | } | ||
| 85 | |||
| 86 | } // namespace VideoCommon::Shader | ||
diff --git a/src/video_core/shader/decode/shift.cpp b/src/video_core/shader/decode/shift.cpp deleted file mode 100644 index a53819c15..000000000 --- a/src/video_core/shader/decode/shift.cpp +++ /dev/null | |||
| @@ -1,153 +0,0 @@ | |||
| 1 | // Copyright 2018 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/assert.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "video_core/engines/shader_bytecode.h" | ||
| 8 | #include "video_core/shader/node_helper.h" | ||
| 9 | #include "video_core/shader/shader_ir.h" | ||
| 10 | |||
| 11 | namespace VideoCommon::Shader { | ||
| 12 | |||
| 13 | using std::move; | ||
| 14 | using Tegra::Shader::Instruction; | ||
| 15 | using Tegra::Shader::OpCode; | ||
| 16 | using Tegra::Shader::ShfType; | ||
| 17 | using Tegra::Shader::ShfXmode; | ||
| 18 | |||
| 19 | namespace { | ||
| 20 | |||
| 21 | Node IsFull(Node shift) { | ||
| 22 | return Operation(OperationCode::LogicalIEqual, move(shift), Immediate(32)); | ||
| 23 | } | ||
| 24 | |||
| 25 | Node Shift(OperationCode opcode, Node value, Node shift) { | ||
| 26 | Node shifted = Operation(opcode, move(value), shift); | ||
| 27 | return Operation(OperationCode::Select, IsFull(move(shift)), Immediate(0), move(shifted)); | ||
| 28 | } | ||
| 29 | |||
| 30 | Node ClampShift(Node shift, s32 size = 32) { | ||
| 31 | shift = Operation(OperationCode::IMax, move(shift), Immediate(0)); | ||
| 32 | return Operation(OperationCode::IMin, move(shift), Immediate(size)); | ||
| 33 | } | ||
| 34 | |||
| 35 | Node WrapShift(Node shift, s32 size = 32) { | ||
| 36 | return Operation(OperationCode::UBitwiseAnd, move(shift), Immediate(size - 1)); | ||
| 37 | } | ||
| 38 | |||
| 39 | Node ShiftRight(Node low, Node high, Node shift, Node low_shift, ShfType type) { | ||
| 40 | // These values are used when the shift value is less than 32 | ||
| 41 | Node less_low = Shift(OperationCode::ILogicalShiftRight, low, shift); | ||
| 42 | Node less_high = Shift(OperationCode::ILogicalShiftLeft, high, low_shift); | ||
| 43 | Node less = Operation(OperationCode::IBitwiseOr, move(less_high), move(less_low)); | ||
| 44 | |||
| 45 | if (type == ShfType::Bits32) { | ||
| 46 | // On 32 bit shifts we are either full (shifting 32) or shifting less than 32 bits | ||
| 47 | return Operation(OperationCode::Select, IsFull(move(shift)), move(high), move(less)); | ||
| 48 | } | ||
| 49 | |||
| 50 | // And these when it's larger than or 32 | ||
| 51 | const bool is_signed = type == ShfType::S64; | ||
| 52 | const auto opcode = SignedToUnsignedCode(OperationCode::IArithmeticShiftRight, is_signed); | ||
| 53 | Node reduced = Operation(OperationCode::IAdd, shift, Immediate(-32)); | ||
| 54 | Node greater = Shift(opcode, high, move(reduced)); | ||
| 55 | |||
| 56 | Node is_less = Operation(OperationCode::LogicalILessThan, shift, Immediate(32)); | ||
| 57 | Node is_zero = Operation(OperationCode::LogicalIEqual, move(shift), Immediate(0)); | ||
| 58 | |||
| 59 | Node value = Operation(OperationCode::Select, move(is_less), move(less), move(greater)); | ||
| 60 | return Operation(OperationCode::Select, move(is_zero), move(high), move(value)); | ||
| 61 | } | ||
| 62 | |||
| 63 | Node ShiftLeft(Node low, Node high, Node shift, Node low_shift, ShfType type) { | ||
| 64 | // These values are used when the shift value is less than 32 | ||
| 65 | Node less_low = Operation(OperationCode::ILogicalShiftRight, low, low_shift); | ||
| 66 | Node less_high = Operation(OperationCode::ILogicalShiftLeft, high, shift); | ||
| 67 | Node less = Operation(OperationCode::IBitwiseOr, move(less_low), move(less_high)); | ||
| 68 | |||
| 69 | if (type == ShfType::Bits32) { | ||
| 70 | // On 32 bit shifts we are either full (shifting 32) or shifting less than 32 bits | ||
| 71 | return Operation(OperationCode::Select, IsFull(move(shift)), move(low), move(less)); | ||
| 72 | } | ||
| 73 | |||
| 74 | // And these when it's larger than or 32 | ||
| 75 | Node reduced = Operation(OperationCode::IAdd, shift, Immediate(-32)); | ||
| 76 | Node greater = Shift(OperationCode::ILogicalShiftLeft, move(low), move(reduced)); | ||
| 77 | |||
| 78 | Node is_less = Operation(OperationCode::LogicalILessThan, shift, Immediate(32)); | ||
| 79 | Node is_zero = Operation(OperationCode::LogicalIEqual, move(shift), Immediate(0)); | ||
| 80 | |||
| 81 | Node value = Operation(OperationCode::Select, move(is_less), move(less), move(greater)); | ||
| 82 | return Operation(OperationCode::Select, move(is_zero), move(high), move(value)); | ||
| 83 | } | ||
| 84 | |||
| 85 | } // Anonymous namespace | ||
| 86 | |||
| 87 | u32 ShaderIR::DecodeShift(NodeBlock& bb, u32 pc) { | ||
| 88 | const Instruction instr = {program_code[pc]}; | ||
| 89 | const auto opcode = OpCode::Decode(instr); | ||
| 90 | |||
| 91 | Node op_a = GetRegister(instr.gpr8); | ||
| 92 | Node op_b = [this, instr] { | ||
| 93 | if (instr.is_b_imm) { | ||
| 94 | return Immediate(instr.alu.GetSignedImm20_20()); | ||
| 95 | } else if (instr.is_b_gpr) { | ||
| 96 | return GetRegister(instr.gpr20); | ||
| 97 | } else { | ||
| 98 | return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()); | ||
| 99 | } | ||
| 100 | }(); | ||
| 101 | |||
| 102 | switch (const auto opid = opcode->get().GetId(); opid) { | ||
| 103 | case OpCode::Id::SHR_C: | ||
| 104 | case OpCode::Id::SHR_R: | ||
| 105 | case OpCode::Id::SHR_IMM: { | ||
| 106 | op_b = instr.shr.wrap ? WrapShift(move(op_b)) : ClampShift(move(op_b)); | ||
| 107 | |||
| 108 | Node value = SignedOperation(OperationCode::IArithmeticShiftRight, instr.shift.is_signed, | ||
| 109 | move(op_a), move(op_b)); | ||
| 110 | SetInternalFlagsFromInteger(bb, value, instr.generates_cc); | ||
| 111 | SetRegister(bb, instr.gpr0, move(value)); | ||
| 112 | break; | ||
| 113 | } | ||
| 114 | case OpCode::Id::SHL_C: | ||
| 115 | case OpCode::Id::SHL_R: | ||
| 116 | case OpCode::Id::SHL_IMM: { | ||
| 117 | Node value = Operation(OperationCode::ILogicalShiftLeft, op_a, op_b); | ||
| 118 | SetInternalFlagsFromInteger(bb, value, instr.generates_cc); | ||
| 119 | SetRegister(bb, instr.gpr0, move(value)); | ||
| 120 | break; | ||
| 121 | } | ||
| 122 | case OpCode::Id::SHF_RIGHT_R: | ||
| 123 | case OpCode::Id::SHF_RIGHT_IMM: | ||
| 124 | case OpCode::Id::SHF_LEFT_R: | ||
| 125 | case OpCode::Id::SHF_LEFT_IMM: { | ||
| 126 | UNIMPLEMENTED_IF(instr.generates_cc); | ||
| 127 | UNIMPLEMENTED_IF_MSG(instr.shf.xmode != ShfXmode::None, "xmode={}", | ||
| 128 | instr.shf.xmode.Value()); | ||
| 129 | |||
| 130 | if (instr.is_b_imm) { | ||
| 131 | op_b = Immediate(static_cast<u32>(instr.shf.immediate)); | ||
| 132 | } | ||
| 133 | const s32 size = instr.shf.type == ShfType::Bits32 ? 32 : 64; | ||
| 134 | Node shift = instr.shf.wrap ? WrapShift(move(op_b), size) : ClampShift(move(op_b), size); | ||
| 135 | |||
| 136 | Node negated_shift = Operation(OperationCode::INegate, shift); | ||
| 137 | Node low_shift = Operation(OperationCode::IAdd, move(negated_shift), Immediate(32)); | ||
| 138 | |||
| 139 | const bool is_right = opid == OpCode::Id::SHF_RIGHT_R || opid == OpCode::Id::SHF_RIGHT_IMM; | ||
| 140 | Node value = (is_right ? ShiftRight : ShiftLeft)( | ||
| 141 | move(op_a), GetRegister(instr.gpr39), move(shift), move(low_shift), instr.shf.type); | ||
| 142 | |||
| 143 | SetRegister(bb, instr.gpr0, move(value)); | ||
| 144 | break; | ||
| 145 | } | ||
| 146 | default: | ||
| 147 | UNIMPLEMENTED_MSG("Unhandled shift instruction: {}", opcode->get().GetName()); | ||
| 148 | } | ||
| 149 | |||
| 150 | return pc; | ||
| 151 | } | ||
| 152 | |||
| 153 | } // namespace VideoCommon::Shader | ||
diff --git a/src/video_core/shader/decode/texture.cpp b/src/video_core/shader/decode/texture.cpp deleted file mode 100644 index c69681e8d..000000000 --- a/src/video_core/shader/decode/texture.cpp +++ /dev/null | |||
| @@ -1,935 +0,0 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <algorithm> | ||
| 6 | #include <vector> | ||
| 7 | #include <fmt/format.h> | ||
| 8 | |||
| 9 | #include "common/assert.h" | ||
| 10 | #include "common/bit_field.h" | ||
| 11 | #include "common/common_types.h" | ||
| 12 | #include "common/logging/log.h" | ||
| 13 | #include "video_core/engines/shader_bytecode.h" | ||
| 14 | #include "video_core/shader/node_helper.h" | ||
| 15 | #include "video_core/shader/registry.h" | ||
| 16 | #include "video_core/shader/shader_ir.h" | ||
| 17 | |||
| 18 | namespace VideoCommon::Shader { | ||
| 19 | |||
| 20 | using Tegra::Shader::Instruction; | ||
| 21 | using Tegra::Shader::OpCode; | ||
| 22 | using Tegra::Shader::Register; | ||
| 23 | using Tegra::Shader::TextureMiscMode; | ||
| 24 | using Tegra::Shader::TextureProcessMode; | ||
| 25 | using Tegra::Shader::TextureType; | ||
| 26 | |||
| 27 | static std::size_t GetCoordCount(TextureType texture_type) { | ||
| 28 | switch (texture_type) { | ||
| 29 | case TextureType::Texture1D: | ||
| 30 | return 1; | ||
| 31 | case TextureType::Texture2D: | ||
| 32 | return 2; | ||
| 33 | case TextureType::Texture3D: | ||
| 34 | case TextureType::TextureCube: | ||
| 35 | return 3; | ||
| 36 | default: | ||
| 37 | UNIMPLEMENTED_MSG("Unhandled texture type: {}", texture_type); | ||
| 38 | return 0; | ||
| 39 | } | ||
| 40 | } | ||
| 41 | |||
| 42 | u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { | ||
| 43 | const Instruction instr = {program_code[pc]}; | ||
| 44 | const auto opcode = OpCode::Decode(instr); | ||
| 45 | bool is_bindless = false; | ||
| 46 | switch (opcode->get().GetId()) { | ||
| 47 | case OpCode::Id::TEX: { | ||
| 48 | const TextureType texture_type{instr.tex.texture_type}; | ||
| 49 | const bool is_array = instr.tex.array != 0; | ||
| 50 | const bool is_aoffi = instr.tex.UsesMiscMode(TextureMiscMode::AOFFI); | ||
| 51 | const bool depth_compare = instr.tex.UsesMiscMode(TextureMiscMode::DC); | ||
| 52 | const auto process_mode = instr.tex.GetTextureProcessMode(); | ||
| 53 | WriteTexInstructionFloat( | ||
| 54 | bb, instr, | ||
| 55 | GetTexCode(instr, texture_type, process_mode, depth_compare, is_array, is_aoffi, {})); | ||
| 56 | break; | ||
| 57 | } | ||
| 58 | case OpCode::Id::TEX_B: { | ||
| 59 | UNIMPLEMENTED_IF_MSG(instr.tex.UsesMiscMode(TextureMiscMode::AOFFI), | ||
| 60 | "AOFFI is not implemented"); | ||
| 61 | |||
| 62 | const TextureType texture_type{instr.tex_b.texture_type}; | ||
| 63 | const bool is_array = instr.tex_b.array != 0; | ||
| 64 | const bool is_aoffi = instr.tex.UsesMiscMode(TextureMiscMode::AOFFI); | ||
| 65 | const bool depth_compare = instr.tex_b.UsesMiscMode(TextureMiscMode::DC); | ||
| 66 | const auto process_mode = instr.tex_b.GetTextureProcessMode(); | ||
| 67 | WriteTexInstructionFloat(bb, instr, | ||
| 68 | GetTexCode(instr, texture_type, process_mode, depth_compare, | ||
| 69 | is_array, is_aoffi, {instr.gpr20})); | ||
| 70 | break; | ||
| 71 | } | ||
| 72 | case OpCode::Id::TEXS: { | ||
| 73 | const TextureType texture_type{instr.texs.GetTextureType()}; | ||
| 74 | const bool is_array{instr.texs.IsArrayTexture()}; | ||
| 75 | const bool depth_compare = instr.texs.UsesMiscMode(TextureMiscMode::DC); | ||
| 76 | const auto process_mode = instr.texs.GetTextureProcessMode(); | ||
| 77 | |||
| 78 | const Node4 components = | ||
| 79 | GetTexsCode(instr, texture_type, process_mode, depth_compare, is_array); | ||
| 80 | |||
| 81 | if (instr.texs.fp32_flag) { | ||
| 82 | WriteTexsInstructionFloat(bb, instr, components); | ||
| 83 | } else { | ||
| 84 | WriteTexsInstructionHalfFloat(bb, instr, components); | ||
| 85 | } | ||
| 86 | break; | ||
| 87 | } | ||
| 88 | case OpCode::Id::TLD4_B: { | ||
| 89 | is_bindless = true; | ||
| 90 | [[fallthrough]]; | ||
| 91 | } | ||
| 92 | case OpCode::Id::TLD4: { | ||
| 93 | UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(TextureMiscMode::NDV), | ||
| 94 | "NDV is not implemented"); | ||
| 95 | const auto texture_type = instr.tld4.texture_type.Value(); | ||
| 96 | const bool depth_compare = is_bindless ? instr.tld4_b.UsesMiscMode(TextureMiscMode::DC) | ||
| 97 | : instr.tld4.UsesMiscMode(TextureMiscMode::DC); | ||
| 98 | const bool is_array = instr.tld4.array != 0; | ||
| 99 | const bool is_aoffi = is_bindless ? instr.tld4_b.UsesMiscMode(TextureMiscMode::AOFFI) | ||
| 100 | : instr.tld4.UsesMiscMode(TextureMiscMode::AOFFI); | ||
| 101 | const bool is_ptp = is_bindless ? instr.tld4_b.UsesMiscMode(TextureMiscMode::PTP) | ||
| 102 | : instr.tld4.UsesMiscMode(TextureMiscMode::PTP); | ||
| 103 | WriteTexInstructionFloat(bb, instr, | ||
| 104 | GetTld4Code(instr, texture_type, depth_compare, is_array, is_aoffi, | ||
| 105 | is_ptp, is_bindless)); | ||
| 106 | break; | ||
| 107 | } | ||
| 108 | case OpCode::Id::TLD4S: { | ||
| 109 | constexpr std::size_t num_coords = 2; | ||
| 110 | const bool is_aoffi = instr.tld4s.UsesMiscMode(TextureMiscMode::AOFFI); | ||
| 111 | const bool is_depth_compare = instr.tld4s.UsesMiscMode(TextureMiscMode::DC); | ||
| 112 | const Node op_a = GetRegister(instr.gpr8); | ||
| 113 | const Node op_b = GetRegister(instr.gpr20); | ||
| 114 | |||
| 115 | // TODO(Subv): Figure out how the sampler type is encoded in the TLD4S instruction. | ||
| 116 | std::vector<Node> coords; | ||
| 117 | std::vector<Node> aoffi; | ||
| 118 | Node depth_compare; | ||
| 119 | if (is_depth_compare) { | ||
| 120 | // Note: TLD4S coordinate encoding works just like TEXS's | ||
| 121 | const Node op_y = GetRegister(instr.gpr8.Value() + 1); | ||
| 122 | coords.push_back(op_a); | ||
| 123 | coords.push_back(op_y); | ||
| 124 | if (is_aoffi) { | ||
| 125 | aoffi = GetAoffiCoordinates(op_b, num_coords, true); | ||
| 126 | depth_compare = GetRegister(instr.gpr20.Value() + 1); | ||
| 127 | } else { | ||
| 128 | depth_compare = op_b; | ||
| 129 | } | ||
| 130 | } else { | ||
| 131 | // There's no depth compare | ||
| 132 | coords.push_back(op_a); | ||
| 133 | if (is_aoffi) { | ||
| 134 | coords.push_back(GetRegister(instr.gpr8.Value() + 1)); | ||
| 135 | aoffi = GetAoffiCoordinates(op_b, num_coords, true); | ||
| 136 | } else { | ||
| 137 | coords.push_back(op_b); | ||
| 138 | } | ||
| 139 | } | ||
| 140 | const Node component = Immediate(static_cast<u32>(instr.tld4s.component)); | ||
| 141 | |||
| 142 | SamplerInfo info; | ||
| 143 | info.is_shadow = is_depth_compare; | ||
| 144 | const std::optional<SamplerEntry> sampler = GetSampler(instr.sampler, info); | ||
| 145 | |||
| 146 | Node4 values; | ||
| 147 | for (u32 element = 0; element < values.size(); ++element) { | ||
| 148 | MetaTexture meta{*sampler, {}, depth_compare, aoffi, {}, {}, | ||
| 149 | {}, {}, component, element, {}}; | ||
| 150 | values[element] = Operation(OperationCode::TextureGather, meta, coords); | ||
| 151 | } | ||
| 152 | |||
| 153 | if (instr.tld4s.fp16_flag) { | ||
| 154 | WriteTexsInstructionHalfFloat(bb, instr, values, true); | ||
| 155 | } else { | ||
| 156 | WriteTexsInstructionFloat(bb, instr, values, true); | ||
| 157 | } | ||
| 158 | break; | ||
| 159 | } | ||
| 160 | case OpCode::Id::TXD_B: | ||
| 161 | is_bindless = true; | ||
| 162 | [[fallthrough]]; | ||
| 163 | case OpCode::Id::TXD: { | ||
| 164 | UNIMPLEMENTED_IF_MSG(instr.txd.UsesMiscMode(TextureMiscMode::AOFFI), | ||
| 165 | "AOFFI is not implemented"); | ||
| 166 | |||
| 167 | const bool is_array = instr.txd.is_array != 0; | ||
| 168 | const auto derivate_reg = instr.gpr20.Value(); | ||
| 169 | const auto texture_type = instr.txd.texture_type.Value(); | ||
| 170 | const auto coord_count = GetCoordCount(texture_type); | ||
| 171 | u64 base_reg = instr.gpr8.Value(); | ||
| 172 | Node index_var; | ||
| 173 | SamplerInfo info; | ||
| 174 | info.type = texture_type; | ||
| 175 | info.is_array = is_array; | ||
| 176 | const std::optional<SamplerEntry> sampler = | ||
| 177 | is_bindless ? GetBindlessSampler(base_reg, info, index_var) | ||
| 178 | : GetSampler(instr.sampler, info); | ||
| 179 | Node4 values; | ||
| 180 | if (!sampler) { | ||
| 181 | std::generate(values.begin(), values.end(), [this] { return Immediate(0); }); | ||
| 182 | WriteTexInstructionFloat(bb, instr, values); | ||
| 183 | break; | ||
| 184 | } | ||
| 185 | |||
| 186 | if (is_bindless) { | ||
| 187 | base_reg++; | ||
| 188 | } | ||
| 189 | |||
| 190 | std::vector<Node> coords; | ||
| 191 | std::vector<Node> derivates; | ||
| 192 | for (std::size_t i = 0; i < coord_count; ++i) { | ||
| 193 | coords.push_back(GetRegister(base_reg + i)); | ||
| 194 | const std::size_t derivate = i * 2; | ||
| 195 | derivates.push_back(GetRegister(derivate_reg + derivate)); | ||
| 196 | derivates.push_back(GetRegister(derivate_reg + derivate + 1)); | ||
| 197 | } | ||
| 198 | |||
| 199 | Node array_node = {}; | ||
| 200 | if (is_array) { | ||
| 201 | const Node info_reg = GetRegister(base_reg + coord_count); | ||
| 202 | array_node = BitfieldExtract(info_reg, 0, 16); | ||
| 203 | } | ||
| 204 | |||
| 205 | for (u32 element = 0; element < values.size(); ++element) { | ||
| 206 | MetaTexture meta{*sampler, array_node, {}, {}, {}, derivates, | ||
| 207 | {}, {}, {}, element, index_var}; | ||
| 208 | values[element] = Operation(OperationCode::TextureGradient, std::move(meta), coords); | ||
| 209 | } | ||
| 210 | |||
| 211 | WriteTexInstructionFloat(bb, instr, values); | ||
| 212 | |||
| 213 | break; | ||
| 214 | } | ||
| 215 | case OpCode::Id::TXQ_B: | ||
| 216 | is_bindless = true; | ||
| 217 | [[fallthrough]]; | ||
| 218 | case OpCode::Id::TXQ: { | ||
| 219 | Node index_var; | ||
| 220 | const std::optional<SamplerEntry> sampler = | ||
| 221 | is_bindless ? GetBindlessSampler(instr.gpr8, {}, index_var) | ||
| 222 | : GetSampler(instr.sampler, {}); | ||
| 223 | |||
| 224 | if (!sampler) { | ||
| 225 | u32 indexer = 0; | ||
| 226 | for (u32 element = 0; element < 4; ++element) { | ||
| 227 | if (!instr.txq.IsComponentEnabled(element)) { | ||
| 228 | continue; | ||
| 229 | } | ||
| 230 | const Node value = Immediate(0); | ||
| 231 | SetTemporary(bb, indexer++, value); | ||
| 232 | } | ||
| 233 | for (u32 i = 0; i < indexer; ++i) { | ||
| 234 | SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i)); | ||
| 235 | } | ||
| 236 | break; | ||
| 237 | } | ||
| 238 | |||
| 239 | u32 indexer = 0; | ||
| 240 | switch (instr.txq.query_type) { | ||
| 241 | case Tegra::Shader::TextureQueryType::Dimension: { | ||
| 242 | for (u32 element = 0; element < 4; ++element) { | ||
| 243 | if (!instr.txq.IsComponentEnabled(element)) { | ||
| 244 | continue; | ||
| 245 | } | ||
| 246 | MetaTexture meta{*sampler, {}, {}, {}, {}, {}, {}, {}, {}, element, index_var}; | ||
| 247 | const Node value = | ||
| 248 | Operation(OperationCode::TextureQueryDimensions, meta, | ||
| 249 | GetRegister(instr.gpr8.Value() + (is_bindless ? 1 : 0))); | ||
| 250 | SetTemporary(bb, indexer++, value); | ||
| 251 | } | ||
| 252 | for (u32 i = 0; i < indexer; ++i) { | ||
| 253 | SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i)); | ||
| 254 | } | ||
| 255 | break; | ||
| 256 | } | ||
| 257 | default: | ||
| 258 | UNIMPLEMENTED_MSG("Unhandled texture query type: {}", instr.txq.query_type.Value()); | ||
| 259 | } | ||
| 260 | break; | ||
| 261 | } | ||
| 262 | case OpCode::Id::TMML_B: | ||
| 263 | is_bindless = true; | ||
| 264 | [[fallthrough]]; | ||
| 265 | case OpCode::Id::TMML: { | ||
| 266 | UNIMPLEMENTED_IF_MSG(instr.tmml.UsesMiscMode(Tegra::Shader::TextureMiscMode::NDV), | ||
| 267 | "NDV is not implemented"); | ||
| 268 | |||
| 269 | const auto texture_type = instr.tmml.texture_type.Value(); | ||
| 270 | const bool is_array = instr.tmml.array != 0; | ||
| 271 | SamplerInfo info; | ||
| 272 | info.type = texture_type; | ||
| 273 | info.is_array = is_array; | ||
| 274 | Node index_var; | ||
| 275 | const std::optional<SamplerEntry> sampler = | ||
| 276 | is_bindless ? GetBindlessSampler(instr.gpr20, info, index_var) | ||
| 277 | : GetSampler(instr.sampler, info); | ||
| 278 | |||
| 279 | if (!sampler) { | ||
| 280 | u32 indexer = 0; | ||
| 281 | for (u32 element = 0; element < 2; ++element) { | ||
| 282 | if (!instr.tmml.IsComponentEnabled(element)) { | ||
| 283 | continue; | ||
| 284 | } | ||
| 285 | const Node value = Immediate(0); | ||
| 286 | SetTemporary(bb, indexer++, value); | ||
| 287 | } | ||
| 288 | for (u32 i = 0; i < indexer; ++i) { | ||
| 289 | SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i)); | ||
| 290 | } | ||
| 291 | break; | ||
| 292 | } | ||
| 293 | |||
| 294 | const u64 base_index = is_array ? 1 : 0; | ||
| 295 | const u64 num_components = [texture_type] { | ||
| 296 | switch (texture_type) { | ||
| 297 | case TextureType::Texture1D: | ||
| 298 | return 1; | ||
| 299 | case TextureType::Texture2D: | ||
| 300 | return 2; | ||
| 301 | case TextureType::TextureCube: | ||
| 302 | return 3; | ||
| 303 | default: | ||
| 304 | UNIMPLEMENTED_MSG("Unhandled texture type {}", texture_type); | ||
| 305 | return 2; | ||
| 306 | } | ||
| 307 | }(); | ||
| 308 | // TODO: What's the array component used for? | ||
| 309 | |||
| 310 | std::vector<Node> coords; | ||
| 311 | coords.reserve(num_components); | ||
| 312 | for (u64 component = 0; component < num_components; ++component) { | ||
| 313 | coords.push_back(GetRegister(instr.gpr8.Value() + base_index + component)); | ||
| 314 | } | ||
| 315 | |||
| 316 | u32 indexer = 0; | ||
| 317 | for (u32 element = 0; element < 2; ++element) { | ||
| 318 | if (!instr.tmml.IsComponentEnabled(element)) { | ||
| 319 | continue; | ||
| 320 | } | ||
| 321 | MetaTexture meta{*sampler, {}, {}, {}, {}, {}, {}, {}, {}, element, index_var}; | ||
| 322 | Node value = Operation(OperationCode::TextureQueryLod, meta, coords); | ||
| 323 | SetTemporary(bb, indexer++, std::move(value)); | ||
| 324 | } | ||
| 325 | for (u32 i = 0; i < indexer; ++i) { | ||
| 326 | SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i)); | ||
| 327 | } | ||
| 328 | break; | ||
| 329 | } | ||
| 330 | case OpCode::Id::TLD: { | ||
| 331 | UNIMPLEMENTED_IF_MSG(instr.tld.aoffi, "AOFFI is not implemented"); | ||
| 332 | UNIMPLEMENTED_IF_MSG(instr.tld.ms, "MS is not implemented"); | ||
| 333 | UNIMPLEMENTED_IF_MSG(instr.tld.cl, "CL is not implemented"); | ||
| 334 | |||
| 335 | WriteTexInstructionFloat(bb, instr, GetTldCode(instr)); | ||
| 336 | break; | ||
| 337 | } | ||
| 338 | case OpCode::Id::TLDS: { | ||
| 339 | const TextureType texture_type{instr.tlds.GetTextureType()}; | ||
| 340 | const bool is_array{instr.tlds.IsArrayTexture()}; | ||
| 341 | |||
| 342 | UNIMPLEMENTED_IF_MSG(instr.tlds.UsesMiscMode(TextureMiscMode::AOFFI), | ||
| 343 | "AOFFI is not implemented"); | ||
| 344 | UNIMPLEMENTED_IF_MSG(instr.tlds.UsesMiscMode(TextureMiscMode::MZ), "MZ is not implemented"); | ||
| 345 | |||
| 346 | const Node4 components = GetTldsCode(instr, texture_type, is_array); | ||
| 347 | |||
| 348 | if (instr.tlds.fp32_flag) { | ||
| 349 | WriteTexsInstructionFloat(bb, instr, components); | ||
| 350 | } else { | ||
| 351 | WriteTexsInstructionHalfFloat(bb, instr, components); | ||
| 352 | } | ||
| 353 | break; | ||
| 354 | } | ||
| 355 | default: | ||
| 356 | UNIMPLEMENTED_MSG("Unhandled memory instruction: {}", opcode->get().GetName()); | ||
| 357 | } | ||
| 358 | |||
| 359 | return pc; | ||
| 360 | } | ||
| 361 | |||
| 362 | ShaderIR::SamplerInfo ShaderIR::GetSamplerInfo( | ||
| 363 | SamplerInfo info, std::optional<Tegra::Engines::SamplerDescriptor> sampler) { | ||
| 364 | if (info.IsComplete()) { | ||
| 365 | return info; | ||
| 366 | } | ||
| 367 | if (!sampler) { | ||
| 368 | LOG_WARNING(HW_GPU, "Unknown sampler info"); | ||
| 369 | info.type = info.type.value_or(Tegra::Shader::TextureType::Texture2D); | ||
| 370 | info.is_array = info.is_array.value_or(false); | ||
| 371 | info.is_shadow = info.is_shadow.value_or(false); | ||
| 372 | info.is_buffer = info.is_buffer.value_or(false); | ||
| 373 | return info; | ||
| 374 | } | ||
| 375 | info.type = info.type.value_or(sampler->texture_type); | ||
| 376 | info.is_array = info.is_array.value_or(sampler->is_array != 0); | ||
| 377 | info.is_shadow = info.is_shadow.value_or(sampler->is_shadow != 0); | ||
| 378 | info.is_buffer = info.is_buffer.value_or(sampler->is_buffer != 0); | ||
| 379 | return info; | ||
| 380 | } | ||
| 381 | |||
| 382 | std::optional<SamplerEntry> ShaderIR::GetSampler(Tegra::Shader::Sampler sampler, | ||
| 383 | SamplerInfo sampler_info) { | ||
| 384 | const u32 offset = static_cast<u32>(sampler.index.Value()); | ||
| 385 | const auto info = GetSamplerInfo(sampler_info, registry.ObtainBoundSampler(offset)); | ||
| 386 | |||
| 387 | // If this sampler has already been used, return the existing mapping. | ||
| 388 | const auto it = | ||
| 389 | std::find_if(used_samplers.begin(), used_samplers.end(), | ||
| 390 | [offset](const SamplerEntry& entry) { return entry.offset == offset; }); | ||
| 391 | if (it != used_samplers.end()) { | ||
| 392 | ASSERT(!it->is_bindless && it->type == info.type && it->is_array == info.is_array && | ||
| 393 | it->is_shadow == info.is_shadow && it->is_buffer == info.is_buffer); | ||
| 394 | return *it; | ||
| 395 | } | ||
| 396 | |||
| 397 | // Otherwise create a new mapping for this sampler | ||
| 398 | const auto next_index = static_cast<u32>(used_samplers.size()); | ||
| 399 | return used_samplers.emplace_back(next_index, offset, *info.type, *info.is_array, | ||
| 400 | *info.is_shadow, *info.is_buffer, false); | ||
| 401 | } | ||
| 402 | |||
| 403 | std::optional<SamplerEntry> ShaderIR::GetBindlessSampler(Tegra::Shader::Register reg, | ||
| 404 | SamplerInfo info, Node& index_var) { | ||
| 405 | const Node sampler_register = GetRegister(reg); | ||
| 406 | const auto [base_node, tracked_sampler_info] = | ||
| 407 | TrackBindlessSampler(sampler_register, global_code, static_cast<s64>(global_code.size())); | ||
| 408 | if (!base_node) { | ||
| 409 | UNREACHABLE(); | ||
| 410 | return std::nullopt; | ||
| 411 | } | ||
| 412 | |||
| 413 | if (const auto sampler_info = std::get_if<BindlessSamplerNode>(&*tracked_sampler_info)) { | ||
| 414 | const u32 buffer = sampler_info->index; | ||
| 415 | const u32 offset = sampler_info->offset; | ||
| 416 | info = GetSamplerInfo(info, registry.ObtainBindlessSampler(buffer, offset)); | ||
| 417 | |||
| 418 | // If this sampler has already been used, return the existing mapping. | ||
| 419 | const auto it = std::find_if(used_samplers.begin(), used_samplers.end(), | ||
| 420 | [buffer, offset](const SamplerEntry& entry) { | ||
| 421 | return entry.buffer == buffer && entry.offset == offset; | ||
| 422 | }); | ||
| 423 | if (it != used_samplers.end()) { | ||
| 424 | ASSERT(it->is_bindless && it->type == info.type && it->is_array == info.is_array && | ||
| 425 | it->is_shadow == info.is_shadow); | ||
| 426 | return *it; | ||
| 427 | } | ||
| 428 | |||
| 429 | // Otherwise create a new mapping for this sampler | ||
| 430 | const auto next_index = static_cast<u32>(used_samplers.size()); | ||
| 431 | return used_samplers.emplace_back(next_index, offset, buffer, *info.type, *info.is_array, | ||
| 432 | *info.is_shadow, *info.is_buffer, false); | ||
| 433 | } | ||
| 434 | if (const auto sampler_info = std::get_if<SeparateSamplerNode>(&*tracked_sampler_info)) { | ||
| 435 | const std::pair indices = sampler_info->indices; | ||
| 436 | const std::pair offsets = sampler_info->offsets; | ||
| 437 | info = GetSamplerInfo(info, registry.ObtainSeparateSampler(indices, offsets)); | ||
| 438 | |||
| 439 | // Try to use an already created sampler if it exists | ||
| 440 | const auto it = | ||
| 441 | std::find_if(used_samplers.begin(), used_samplers.end(), | ||
| 442 | [indices, offsets](const SamplerEntry& entry) { | ||
| 443 | return offsets == std::pair{entry.offset, entry.secondary_offset} && | ||
| 444 | indices == std::pair{entry.buffer, entry.secondary_buffer}; | ||
| 445 | }); | ||
| 446 | if (it != used_samplers.end()) { | ||
| 447 | ASSERT(it->is_separated && it->type == info.type && it->is_array == info.is_array && | ||
| 448 | it->is_shadow == info.is_shadow && it->is_buffer == info.is_buffer); | ||
| 449 | return *it; | ||
| 450 | } | ||
| 451 | |||
| 452 | // Otherwise create a new mapping for this sampler | ||
| 453 | const u32 next_index = static_cast<u32>(used_samplers.size()); | ||
| 454 | return used_samplers.emplace_back(next_index, offsets, indices, *info.type, *info.is_array, | ||
| 455 | *info.is_shadow, *info.is_buffer); | ||
| 456 | } | ||
| 457 | if (const auto sampler_info = std::get_if<ArraySamplerNode>(&*tracked_sampler_info)) { | ||
| 458 | const u32 base_offset = sampler_info->base_offset / 4; | ||
| 459 | index_var = GetCustomVariable(sampler_info->bindless_var); | ||
| 460 | info = GetSamplerInfo(info, registry.ObtainBoundSampler(base_offset)); | ||
| 461 | |||
| 462 | // If this sampler has already been used, return the existing mapping. | ||
| 463 | const auto it = std::find_if( | ||
| 464 | used_samplers.begin(), used_samplers.end(), | ||
| 465 | [base_offset](const SamplerEntry& entry) { return entry.offset == base_offset; }); | ||
| 466 | if (it != used_samplers.end()) { | ||
| 467 | ASSERT(!it->is_bindless && it->type == info.type && it->is_array == info.is_array && | ||
| 468 | it->is_shadow == info.is_shadow && it->is_buffer == info.is_buffer && | ||
| 469 | it->is_indexed); | ||
| 470 | return *it; | ||
| 471 | } | ||
| 472 | |||
| 473 | uses_indexed_samplers = true; | ||
| 474 | // Otherwise create a new mapping for this sampler | ||
| 475 | const auto next_index = static_cast<u32>(used_samplers.size()); | ||
| 476 | return used_samplers.emplace_back(next_index, base_offset, *info.type, *info.is_array, | ||
| 477 | *info.is_shadow, *info.is_buffer, true); | ||
| 478 | } | ||
| 479 | return std::nullopt; | ||
| 480 | } | ||
| 481 | |||
| 482 | void ShaderIR::WriteTexInstructionFloat(NodeBlock& bb, Instruction instr, const Node4& components) { | ||
| 483 | u32 dest_elem = 0; | ||
| 484 | for (u32 elem = 0; elem < 4; ++elem) { | ||
| 485 | if (!instr.tex.IsComponentEnabled(elem)) { | ||
| 486 | // Skip disabled components | ||
| 487 | continue; | ||
| 488 | } | ||
| 489 | SetTemporary(bb, dest_elem++, components[elem]); | ||
| 490 | } | ||
| 491 | // After writing values in temporals, move them to the real registers | ||
| 492 | for (u32 i = 0; i < dest_elem; ++i) { | ||
| 493 | SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i)); | ||
| 494 | } | ||
| 495 | } | ||
| 496 | |||
| 497 | void ShaderIR::WriteTexsInstructionFloat(NodeBlock& bb, Instruction instr, const Node4& components, | ||
| 498 | bool ignore_mask) { | ||
| 499 | // TEXS has two destination registers and a swizzle. The first two elements in the swizzle | ||
| 500 | // go into gpr0+0 and gpr0+1, and the rest goes into gpr28+0 and gpr28+1 | ||
| 501 | |||
| 502 | u32 dest_elem = 0; | ||
| 503 | for (u32 component = 0; component < 4; ++component) { | ||
| 504 | if (!instr.texs.IsComponentEnabled(component) && !ignore_mask) | ||
| 505 | continue; | ||
| 506 | SetTemporary(bb, dest_elem++, components[component]); | ||
| 507 | } | ||
| 508 | |||
| 509 | for (u32 i = 0; i < dest_elem; ++i) { | ||
| 510 | if (i < 2) { | ||
| 511 | // Write the first two swizzle components to gpr0 and gpr0+1 | ||
| 512 | SetRegister(bb, instr.gpr0.Value() + i % 2, GetTemporary(i)); | ||
| 513 | } else { | ||
| 514 | ASSERT(instr.texs.HasTwoDestinations()); | ||
| 515 | // Write the rest of the swizzle components to gpr28 and gpr28+1 | ||
| 516 | SetRegister(bb, instr.gpr28.Value() + i % 2, GetTemporary(i)); | ||
| 517 | } | ||
| 518 | } | ||
| 519 | } | ||
| 520 | |||
| 521 | void ShaderIR::WriteTexsInstructionHalfFloat(NodeBlock& bb, Instruction instr, | ||
| 522 | const Node4& components, bool ignore_mask) { | ||
| 523 | // TEXS.F16 destionation registers are packed in two registers in pairs (just like any half | ||
| 524 | // float instruction). | ||
| 525 | |||
| 526 | Node4 values; | ||
| 527 | u32 dest_elem = 0; | ||
| 528 | for (u32 component = 0; component < 4; ++component) { | ||
| 529 | if (!instr.texs.IsComponentEnabled(component) && !ignore_mask) | ||
| 530 | continue; | ||
| 531 | values[dest_elem++] = components[component]; | ||
| 532 | } | ||
| 533 | if (dest_elem == 0) | ||
| 534 | return; | ||
| 535 | |||
| 536 | std::generate(values.begin() + dest_elem, values.end(), [&]() { return Immediate(0); }); | ||
| 537 | |||
| 538 | const Node first_value = Operation(OperationCode::HPack2, values[0], values[1]); | ||
| 539 | if (dest_elem <= 2) { | ||
| 540 | SetRegister(bb, instr.gpr0, first_value); | ||
| 541 | return; | ||
| 542 | } | ||
| 543 | |||
| 544 | SetTemporary(bb, 0, first_value); | ||
| 545 | SetTemporary(bb, 1, Operation(OperationCode::HPack2, values[2], values[3])); | ||
| 546 | |||
| 547 | SetRegister(bb, instr.gpr0, GetTemporary(0)); | ||
| 548 | SetRegister(bb, instr.gpr28, GetTemporary(1)); | ||
| 549 | } | ||
| 550 | |||
| 551 | Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type, | ||
| 552 | TextureProcessMode process_mode, std::vector<Node> coords, | ||
| 553 | Node array, Node depth_compare, u32 bias_offset, | ||
| 554 | std::vector<Node> aoffi, | ||
| 555 | std::optional<Tegra::Shader::Register> bindless_reg) { | ||
| 556 | const bool is_array = array != nullptr; | ||
| 557 | const bool is_shadow = depth_compare != nullptr; | ||
| 558 | const bool is_bindless = bindless_reg.has_value(); | ||
| 559 | |||
| 560 | ASSERT_MSG(texture_type != TextureType::Texture3D || !is_array || !is_shadow, | ||
| 561 | "Illegal texture type"); | ||
| 562 | |||
| 563 | SamplerInfo info; | ||
| 564 | info.type = texture_type; | ||
| 565 | info.is_array = is_array; | ||
| 566 | info.is_shadow = is_shadow; | ||
| 567 | info.is_buffer = false; | ||
| 568 | |||
| 569 | Node index_var; | ||
| 570 | const std::optional<SamplerEntry> sampler = | ||
| 571 | is_bindless ? GetBindlessSampler(*bindless_reg, info, index_var) | ||
| 572 | : GetSampler(instr.sampler, info); | ||
| 573 | if (!sampler) { | ||
| 574 | return {Immediate(0), Immediate(0), Immediate(0), Immediate(0)}; | ||
| 575 | } | ||
| 576 | |||
| 577 | const bool lod_needed = process_mode == TextureProcessMode::LZ || | ||
| 578 | process_mode == TextureProcessMode::LL || | ||
| 579 | process_mode == TextureProcessMode::LLA; | ||
| 580 | const OperationCode opcode = lod_needed ? OperationCode::TextureLod : OperationCode::Texture; | ||
| 581 | |||
| 582 | Node bias; | ||
| 583 | Node lod; | ||
| 584 | switch (process_mode) { | ||
| 585 | case TextureProcessMode::None: | ||
| 586 | break; | ||
| 587 | case TextureProcessMode::LZ: | ||
| 588 | lod = Immediate(0.0f); | ||
| 589 | break; | ||
| 590 | case TextureProcessMode::LB: | ||
| 591 | // If present, lod or bias are always stored in the register indexed by the gpr20 field with | ||
| 592 | // an offset depending on the usage of the other registers. | ||
| 593 | bias = GetRegister(instr.gpr20.Value() + bias_offset); | ||
| 594 | break; | ||
| 595 | case TextureProcessMode::LL: | ||
| 596 | lod = GetRegister(instr.gpr20.Value() + bias_offset); | ||
| 597 | break; | ||
| 598 | default: | ||
| 599 | UNIMPLEMENTED_MSG("Unimplemented process mode={}", process_mode); | ||
| 600 | break; | ||
| 601 | } | ||
| 602 | |||
| 603 | Node4 values; | ||
| 604 | for (u32 element = 0; element < values.size(); ++element) { | ||
| 605 | MetaTexture meta{*sampler, array, depth_compare, aoffi, {}, {}, bias, | ||
| 606 | lod, {}, element, index_var}; | ||
| 607 | values[element] = Operation(opcode, meta, coords); | ||
| 608 | } | ||
| 609 | |||
| 610 | return values; | ||
| 611 | } | ||
| 612 | |||
| 613 | Node4 ShaderIR::GetTexCode(Instruction instr, TextureType texture_type, | ||
| 614 | TextureProcessMode process_mode, bool depth_compare, bool is_array, | ||
| 615 | bool is_aoffi, std::optional<Tegra::Shader::Register> bindless_reg) { | ||
| 616 | const bool lod_bias_enabled{ | ||
| 617 | (process_mode != TextureProcessMode::None && process_mode != TextureProcessMode::LZ)}; | ||
| 618 | |||
| 619 | const bool is_bindless = bindless_reg.has_value(); | ||
| 620 | |||
| 621 | u64 parameter_register = instr.gpr20.Value(); | ||
| 622 | if (is_bindless) { | ||
| 623 | ++parameter_register; | ||
| 624 | } | ||
| 625 | |||
| 626 | const u32 bias_lod_offset = (is_bindless ? 1 : 0); | ||
| 627 | if (lod_bias_enabled) { | ||
| 628 | ++parameter_register; | ||
| 629 | } | ||
| 630 | |||
| 631 | const auto coord_counts = ValidateAndGetCoordinateElement(texture_type, depth_compare, is_array, | ||
| 632 | lod_bias_enabled, 4, 5); | ||
| 633 | const auto coord_count = std::get<0>(coord_counts); | ||
| 634 | // If enabled arrays index is always stored in the gpr8 field | ||
| 635 | const u64 array_register = instr.gpr8.Value(); | ||
| 636 | // First coordinate index is the gpr8 or gpr8 + 1 when arrays are used | ||
| 637 | const u64 coord_register = array_register + (is_array ? 1 : 0); | ||
| 638 | |||
| 639 | std::vector<Node> coords; | ||
| 640 | for (std::size_t i = 0; i < coord_count; ++i) { | ||
| 641 | coords.push_back(GetRegister(coord_register + i)); | ||
| 642 | } | ||
| 643 | // 1D.DC in OpenGL the 2nd component is ignored. | ||
| 644 | if (depth_compare && !is_array && texture_type == TextureType::Texture1D) { | ||
| 645 | coords.push_back(Immediate(0.0f)); | ||
| 646 | } | ||
| 647 | |||
| 648 | const Node array = is_array ? GetRegister(array_register) : nullptr; | ||
| 649 | |||
| 650 | std::vector<Node> aoffi; | ||
| 651 | if (is_aoffi) { | ||
| 652 | aoffi = GetAoffiCoordinates(GetRegister(parameter_register++), coord_count, false); | ||
| 653 | } | ||
| 654 | |||
| 655 | Node dc; | ||
| 656 | if (depth_compare) { | ||
| 657 | // Depth is always stored in the register signaled by gpr20 or in the next register if lod | ||
| 658 | // or bias are used | ||
| 659 | dc = GetRegister(parameter_register++); | ||
| 660 | } | ||
| 661 | |||
| 662 | return GetTextureCode(instr, texture_type, process_mode, coords, array, dc, bias_lod_offset, | ||
| 663 | aoffi, bindless_reg); | ||
| 664 | } | ||
| 665 | |||
| 666 | Node4 ShaderIR::GetTexsCode(Instruction instr, TextureType texture_type, | ||
| 667 | TextureProcessMode process_mode, bool depth_compare, bool is_array) { | ||
| 668 | const bool lod_bias_enabled = | ||
| 669 | (process_mode != TextureProcessMode::None && process_mode != TextureProcessMode::LZ); | ||
| 670 | |||
| 671 | const auto coord_counts = ValidateAndGetCoordinateElement(texture_type, depth_compare, is_array, | ||
| 672 | lod_bias_enabled, 4, 4); | ||
| 673 | const auto coord_count = std::get<0>(coord_counts); | ||
| 674 | |||
| 675 | // If enabled arrays index is always stored in the gpr8 field | ||
| 676 | const u64 array_register = instr.gpr8.Value(); | ||
| 677 | // First coordinate index is stored in gpr8 field or (gpr8 + 1) when arrays are used | ||
| 678 | const u64 coord_register = array_register + (is_array ? 1 : 0); | ||
| 679 | const u64 last_coord_register = | ||
| 680 | (is_array || !(lod_bias_enabled || depth_compare) || (coord_count > 2)) | ||
| 681 | ? static_cast<u64>(instr.gpr20.Value()) | ||
| 682 | : coord_register + 1; | ||
| 683 | const u32 bias_offset = coord_count > 2 ? 1 : 0; | ||
| 684 | |||
| 685 | std::vector<Node> coords; | ||
| 686 | for (std::size_t i = 0; i < coord_count; ++i) { | ||
| 687 | const bool last = (i == (coord_count - 1)) && (coord_count > 1); | ||
| 688 | coords.push_back(GetRegister(last ? last_coord_register : coord_register + i)); | ||
| 689 | } | ||
| 690 | |||
| 691 | const Node array = is_array ? GetRegister(array_register) : nullptr; | ||
| 692 | |||
| 693 | Node dc; | ||
| 694 | if (depth_compare) { | ||
| 695 | // Depth is always stored in the register signaled by gpr20 or in the next register if lod | ||
| 696 | // or bias are used | ||
| 697 | const u64 depth_register = instr.gpr20.Value() + (lod_bias_enabled ? 1 : 0); | ||
| 698 | dc = GetRegister(depth_register); | ||
| 699 | } | ||
| 700 | |||
| 701 | return GetTextureCode(instr, texture_type, process_mode, coords, array, dc, bias_offset, {}, | ||
| 702 | {}); | ||
| 703 | } | ||
| 704 | |||
| 705 | Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool depth_compare, | ||
| 706 | bool is_array, bool is_aoffi, bool is_ptp, bool is_bindless) { | ||
| 707 | ASSERT_MSG(!(is_aoffi && is_ptp), "AOFFI and PTP can't be enabled at the same time"); | ||
| 708 | |||
| 709 | const std::size_t coord_count = GetCoordCount(texture_type); | ||
| 710 | |||
| 711 | // If enabled arrays index is always stored in the gpr8 field | ||
| 712 | const u64 array_register = instr.gpr8.Value(); | ||
| 713 | // First coordinate index is the gpr8 or gpr8 + 1 when arrays are used | ||
| 714 | const u64 coord_register = array_register + (is_array ? 1 : 0); | ||
| 715 | |||
| 716 | std::vector<Node> coords; | ||
| 717 | for (std::size_t i = 0; i < coord_count; ++i) { | ||
| 718 | coords.push_back(GetRegister(coord_register + i)); | ||
| 719 | } | ||
| 720 | |||
| 721 | u64 parameter_register = instr.gpr20.Value(); | ||
| 722 | |||
| 723 | SamplerInfo info; | ||
| 724 | info.type = texture_type; | ||
| 725 | info.is_array = is_array; | ||
| 726 | info.is_shadow = depth_compare; | ||
| 727 | |||
| 728 | Node index_var; | ||
| 729 | const std::optional<SamplerEntry> sampler = | ||
| 730 | is_bindless ? GetBindlessSampler(parameter_register++, info, index_var) | ||
| 731 | : GetSampler(instr.sampler, info); | ||
| 732 | Node4 values; | ||
| 733 | if (!sampler) { | ||
| 734 | for (u32 element = 0; element < values.size(); ++element) { | ||
| 735 | values[element] = Immediate(0); | ||
| 736 | } | ||
| 737 | return values; | ||
| 738 | } | ||
| 739 | |||
| 740 | std::vector<Node> aoffi, ptp; | ||
| 741 | if (is_aoffi) { | ||
| 742 | aoffi = GetAoffiCoordinates(GetRegister(parameter_register++), coord_count, true); | ||
| 743 | } else if (is_ptp) { | ||
| 744 | ptp = GetPtpCoordinates( | ||
| 745 | {GetRegister(parameter_register++), GetRegister(parameter_register++)}); | ||
| 746 | } | ||
| 747 | |||
| 748 | Node dc; | ||
| 749 | if (depth_compare) { | ||
| 750 | dc = GetRegister(parameter_register++); | ||
| 751 | } | ||
| 752 | |||
| 753 | const Node component = is_bindless ? Immediate(static_cast<u32>(instr.tld4_b.component)) | ||
| 754 | : Immediate(static_cast<u32>(instr.tld4.component)); | ||
| 755 | |||
| 756 | for (u32 element = 0; element < values.size(); ++element) { | ||
| 757 | auto coords_copy = coords; | ||
| 758 | MetaTexture meta{ | ||
| 759 | *sampler, GetRegister(array_register), dc, aoffi, ptp, {}, {}, {}, component, element, | ||
| 760 | index_var}; | ||
| 761 | values[element] = Operation(OperationCode::TextureGather, meta, std::move(coords_copy)); | ||
| 762 | } | ||
| 763 | |||
| 764 | return values; | ||
| 765 | } | ||
| 766 | |||
| 767 | Node4 ShaderIR::GetTldCode(Tegra::Shader::Instruction instr) { | ||
| 768 | const auto texture_type{instr.tld.texture_type}; | ||
| 769 | const bool is_array{instr.tld.is_array != 0}; | ||
| 770 | const bool lod_enabled{instr.tld.GetTextureProcessMode() == TextureProcessMode::LL}; | ||
| 771 | const std::size_t coord_count{GetCoordCount(texture_type)}; | ||
| 772 | |||
| 773 | u64 gpr8_cursor{instr.gpr8.Value()}; | ||
| 774 | const Node array_register{is_array ? GetRegister(gpr8_cursor++) : nullptr}; | ||
| 775 | |||
| 776 | std::vector<Node> coords; | ||
| 777 | coords.reserve(coord_count); | ||
| 778 | for (std::size_t i = 0; i < coord_count; ++i) { | ||
| 779 | coords.push_back(GetRegister(gpr8_cursor++)); | ||
| 780 | } | ||
| 781 | |||
| 782 | u64 gpr20_cursor{instr.gpr20.Value()}; | ||
| 783 | // const Node bindless_register{is_bindless ? GetRegister(gpr20_cursor++) : nullptr}; | ||
| 784 | const Node lod{lod_enabled ? GetRegister(gpr20_cursor++) : Immediate(0u)}; | ||
| 785 | // const Node aoffi_register{is_aoffi ? GetRegister(gpr20_cursor++) : nullptr}; | ||
| 786 | // const Node multisample{is_multisample ? GetRegister(gpr20_cursor++) : nullptr}; | ||
| 787 | |||
| 788 | const std::optional<SamplerEntry> sampler = GetSampler(instr.sampler, {}); | ||
| 789 | |||
| 790 | Node4 values; | ||
| 791 | for (u32 element = 0; element < values.size(); ++element) { | ||
| 792 | auto coords_copy = coords; | ||
| 793 | MetaTexture meta{*sampler, array_register, {}, {}, {}, {}, {}, lod, {}, element, {}}; | ||
| 794 | values[element] = Operation(OperationCode::TexelFetch, meta, std::move(coords_copy)); | ||
| 795 | } | ||
| 796 | |||
| 797 | return values; | ||
| 798 | } | ||
| 799 | |||
| 800 | Node4 ShaderIR::GetTldsCode(Instruction instr, TextureType texture_type, bool is_array) { | ||
| 801 | SamplerInfo info; | ||
| 802 | info.type = texture_type; | ||
| 803 | info.is_array = is_array; | ||
| 804 | info.is_shadow = false; | ||
| 805 | const std::optional<SamplerEntry> sampler = GetSampler(instr.sampler, info); | ||
| 806 | |||
| 807 | const std::size_t type_coord_count = GetCoordCount(texture_type); | ||
| 808 | const bool lod_enabled = instr.tlds.GetTextureProcessMode() == TextureProcessMode::LL; | ||
| 809 | const bool aoffi_enabled = instr.tlds.UsesMiscMode(TextureMiscMode::AOFFI); | ||
| 810 | |||
| 811 | // If enabled arrays index is always stored in the gpr8 field | ||
| 812 | const u64 array_register = instr.gpr8.Value(); | ||
| 813 | // if is array gpr20 is used | ||
| 814 | const u64 coord_register = is_array ? instr.gpr20.Value() : instr.gpr8.Value(); | ||
| 815 | |||
| 816 | const u64 last_coord_register = | ||
| 817 | ((type_coord_count > 2) || (type_coord_count == 2 && !lod_enabled)) && !is_array | ||
| 818 | ? static_cast<u64>(instr.gpr20.Value()) | ||
| 819 | : coord_register + 1; | ||
| 820 | |||
| 821 | std::vector<Node> coords; | ||
| 822 | for (std::size_t i = 0; i < type_coord_count; ++i) { | ||
| 823 | const bool last = (i == (type_coord_count - 1)) && (type_coord_count > 1); | ||
| 824 | coords.push_back( | ||
| 825 | GetRegister(last && !aoffi_enabled ? last_coord_register : coord_register + i)); | ||
| 826 | } | ||
| 827 | |||
| 828 | const Node array = is_array ? GetRegister(array_register) : nullptr; | ||
| 829 | // When lod is used always is in gpr20 | ||
| 830 | const Node lod = lod_enabled ? GetRegister(instr.gpr20) : Immediate(0); | ||
| 831 | |||
| 832 | std::vector<Node> aoffi; | ||
| 833 | if (aoffi_enabled) { | ||
| 834 | aoffi = GetAoffiCoordinates(GetRegister(instr.gpr20), type_coord_count, false); | ||
| 835 | } | ||
| 836 | |||
| 837 | Node4 values; | ||
| 838 | for (u32 element = 0; element < values.size(); ++element) { | ||
| 839 | auto coords_copy = coords; | ||
| 840 | MetaTexture meta{*sampler, array, {}, aoffi, {}, {}, {}, lod, {}, element, {}}; | ||
| 841 | values[element] = Operation(OperationCode::TexelFetch, meta, std::move(coords_copy)); | ||
| 842 | } | ||
| 843 | return values; | ||
| 844 | } | ||
| 845 | |||
| 846 | std::tuple<std::size_t, std::size_t> ShaderIR::ValidateAndGetCoordinateElement( | ||
| 847 | TextureType texture_type, bool depth_compare, bool is_array, bool lod_bias_enabled, | ||
| 848 | std::size_t max_coords, std::size_t max_inputs) { | ||
| 849 | const std::size_t coord_count = GetCoordCount(texture_type); | ||
| 850 | |||
| 851 | std::size_t total_coord_count = coord_count + (is_array ? 1 : 0) + (depth_compare ? 1 : 0); | ||
| 852 | const std::size_t total_reg_count = total_coord_count + (lod_bias_enabled ? 1 : 0); | ||
| 853 | if (total_coord_count > max_coords || total_reg_count > max_inputs) { | ||
| 854 | UNIMPLEMENTED_MSG("Unsupported Texture operation"); | ||
| 855 | total_coord_count = std::min(total_coord_count, max_coords); | ||
| 856 | } | ||
| 857 | // 1D.DC OpenGL is using a vec3 but 2nd component is ignored later. | ||
| 858 | total_coord_count += | ||
| 859 | (depth_compare && !is_array && texture_type == TextureType::Texture1D) ? 1 : 0; | ||
| 860 | |||
| 861 | return {coord_count, total_coord_count}; | ||
| 862 | } | ||
| 863 | |||
| 864 | std::vector<Node> ShaderIR::GetAoffiCoordinates(Node aoffi_reg, std::size_t coord_count, | ||
| 865 | bool is_tld4) { | ||
| 866 | const std::array coord_offsets = is_tld4 ? std::array{0U, 8U, 16U} : std::array{0U, 4U, 8U}; | ||
| 867 | const u32 size = is_tld4 ? 6 : 4; | ||
| 868 | const s32 wrap_value = is_tld4 ? 32 : 8; | ||
| 869 | const s32 diff_value = is_tld4 ? 64 : 16; | ||
| 870 | const u32 mask = (1U << size) - 1; | ||
| 871 | |||
| 872 | std::vector<Node> aoffi; | ||
| 873 | aoffi.reserve(coord_count); | ||
| 874 | |||
| 875 | const auto aoffi_immediate{ | ||
| 876 | TrackImmediate(aoffi_reg, global_code, static_cast<s64>(global_code.size()))}; | ||
| 877 | if (!aoffi_immediate) { | ||
| 878 | // Variable access, not supported on AMD. | ||
| 879 | LOG_WARNING(HW_GPU, | ||
| 880 | "AOFFI constant folding failed, some hardware might have graphical issues"); | ||
| 881 | for (std::size_t coord = 0; coord < coord_count; ++coord) { | ||
| 882 | const Node value = BitfieldExtract(aoffi_reg, coord_offsets[coord], size); | ||
| 883 | const Node condition = | ||
| 884 | Operation(OperationCode::LogicalIGreaterEqual, value, Immediate(wrap_value)); | ||
| 885 | const Node negative = Operation(OperationCode::IAdd, value, Immediate(-diff_value)); | ||
| 886 | aoffi.push_back(Operation(OperationCode::Select, condition, negative, value)); | ||
| 887 | } | ||
| 888 | return aoffi; | ||
| 889 | } | ||
| 890 | |||
| 891 | for (std::size_t coord = 0; coord < coord_count; ++coord) { | ||
| 892 | s32 value = (*aoffi_immediate >> coord_offsets[coord]) & mask; | ||
| 893 | if (value >= wrap_value) { | ||
| 894 | value -= diff_value; | ||
| 895 | } | ||
| 896 | aoffi.push_back(Immediate(value)); | ||
| 897 | } | ||
| 898 | return aoffi; | ||
| 899 | } | ||
| 900 | |||
| 901 | std::vector<Node> ShaderIR::GetPtpCoordinates(std::array<Node, 2> ptp_regs) { | ||
| 902 | static constexpr u32 num_entries = 8; | ||
| 903 | |||
| 904 | std::vector<Node> ptp; | ||
| 905 | ptp.reserve(num_entries); | ||
| 906 | |||
| 907 | const auto global_size = static_cast<s64>(global_code.size()); | ||
| 908 | const std::optional low = TrackImmediate(ptp_regs[0], global_code, global_size); | ||
| 909 | const std::optional high = TrackImmediate(ptp_regs[1], global_code, global_size); | ||
| 910 | if (!low || !high) { | ||
| 911 | for (u32 entry = 0; entry < num_entries; ++entry) { | ||
| 912 | const u32 reg = entry / 4; | ||
| 913 | const u32 offset = entry % 4; | ||
| 914 | const Node value = BitfieldExtract(ptp_regs[reg], offset * 8, 6); | ||
| 915 | const Node condition = | ||
| 916 | Operation(OperationCode::LogicalIGreaterEqual, value, Immediate(32)); | ||
| 917 | const Node negative = Operation(OperationCode::IAdd, value, Immediate(-64)); | ||
| 918 | ptp.push_back(Operation(OperationCode::Select, condition, negative, value)); | ||
| 919 | } | ||
| 920 | return ptp; | ||
| 921 | } | ||
| 922 | |||
| 923 | const u64 immediate = (static_cast<u64>(*high) << 32) | static_cast<u64>(*low); | ||
| 924 | for (u32 entry = 0; entry < num_entries; ++entry) { | ||
| 925 | s32 value = (immediate >> (entry * 8)) & 0b111111; | ||
| 926 | if (value >= 32) { | ||
| 927 | value -= 64; | ||
| 928 | } | ||
| 929 | ptp.push_back(Immediate(value)); | ||
| 930 | } | ||
| 931 | |||
| 932 | return ptp; | ||
| 933 | } | ||
| 934 | |||
| 935 | } // namespace VideoCommon::Shader | ||
diff --git a/src/video_core/shader/decode/video.cpp b/src/video_core/shader/decode/video.cpp deleted file mode 100644 index 1c0957277..000000000 --- a/src/video_core/shader/decode/video.cpp +++ /dev/null | |||
| @@ -1,169 +0,0 @@ | |||
| 1 | // Copyright 2018 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/assert.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "video_core/engines/shader_bytecode.h" | ||
| 8 | #include "video_core/shader/node_helper.h" | ||
| 9 | #include "video_core/shader/shader_ir.h" | ||
| 10 | |||
| 11 | namespace VideoCommon::Shader { | ||
| 12 | |||
| 13 | using std::move; | ||
| 14 | using Tegra::Shader::Instruction; | ||
| 15 | using Tegra::Shader::OpCode; | ||
| 16 | using Tegra::Shader::Pred; | ||
| 17 | using Tegra::Shader::VideoType; | ||
| 18 | using Tegra::Shader::VmadShr; | ||
| 19 | using Tegra::Shader::VmnmxOperation; | ||
| 20 | using Tegra::Shader::VmnmxType; | ||
| 21 | |||
| 22 | u32 ShaderIR::DecodeVideo(NodeBlock& bb, u32 pc) { | ||
| 23 | const Instruction instr = {program_code[pc]}; | ||
| 24 | const auto opcode = OpCode::Decode(instr); | ||
| 25 | |||
| 26 | if (opcode->get().GetId() == OpCode::Id::VMNMX) { | ||
| 27 | DecodeVMNMX(bb, instr); | ||
| 28 | return pc; | ||
| 29 | } | ||
| 30 | |||
| 31 | const Node op_a = | ||
| 32 | GetVideoOperand(GetRegister(instr.gpr8), instr.video.is_byte_chunk_a, instr.video.signed_a, | ||
| 33 | instr.video.type_a, instr.video.byte_height_a); | ||
| 34 | const Node op_b = [this, instr] { | ||
| 35 | if (instr.video.use_register_b) { | ||
| 36 | return GetVideoOperand(GetRegister(instr.gpr20), instr.video.is_byte_chunk_b, | ||
| 37 | instr.video.signed_b, instr.video.type_b, | ||
| 38 | instr.video.byte_height_b); | ||
| 39 | } | ||
| 40 | if (instr.video.signed_b) { | ||
| 41 | const auto imm = static_cast<s16>(instr.alu.GetImm20_16()); | ||
| 42 | return Immediate(static_cast<u32>(imm)); | ||
| 43 | } else { | ||
| 44 | return Immediate(instr.alu.GetImm20_16()); | ||
| 45 | } | ||
| 46 | }(); | ||
| 47 | |||
| 48 | switch (opcode->get().GetId()) { | ||
| 49 | case OpCode::Id::VMAD: { | ||
| 50 | const bool result_signed = instr.video.signed_a == 1 || instr.video.signed_b == 1; | ||
| 51 | const Node op_c = GetRegister(instr.gpr39); | ||
| 52 | |||
| 53 | Node value = SignedOperation(OperationCode::IMul, result_signed, NO_PRECISE, op_a, op_b); | ||
| 54 | value = SignedOperation(OperationCode::IAdd, result_signed, NO_PRECISE, value, op_c); | ||
| 55 | |||
| 56 | if (instr.vmad.shr == VmadShr::Shr7 || instr.vmad.shr == VmadShr::Shr15) { | ||
| 57 | const Node shift = Immediate(instr.vmad.shr == VmadShr::Shr7 ? 7 : 15); | ||
| 58 | value = | ||
| 59 | SignedOperation(OperationCode::IArithmeticShiftRight, result_signed, value, shift); | ||
| 60 | } | ||
| 61 | |||
| 62 | SetInternalFlagsFromInteger(bb, value, instr.generates_cc); | ||
| 63 | SetRegister(bb, instr.gpr0, value); | ||
| 64 | break; | ||
| 65 | } | ||
| 66 | case OpCode::Id::VSETP: { | ||
| 67 | // We can't use the constant predicate as destination. | ||
| 68 | ASSERT(instr.vsetp.pred3 != static_cast<u64>(Pred::UnusedIndex)); | ||
| 69 | |||
| 70 | const bool sign = instr.video.signed_a == 1 || instr.video.signed_b == 1; | ||
| 71 | const Node first_pred = GetPredicateComparisonInteger(instr.vsetp.cond, sign, op_a, op_b); | ||
| 72 | const Node second_pred = GetPredicate(instr.vsetp.pred39, false); | ||
| 73 | |||
| 74 | const OperationCode combiner = GetPredicateCombiner(instr.vsetp.op); | ||
| 75 | |||
| 76 | // Set the primary predicate to the result of Predicate OP SecondPredicate | ||
| 77 | SetPredicate(bb, instr.vsetp.pred3, Operation(combiner, first_pred, second_pred)); | ||
| 78 | |||
| 79 | if (instr.vsetp.pred0 != static_cast<u64>(Pred::UnusedIndex)) { | ||
| 80 | // Set the secondary predicate to the result of !Predicate OP SecondPredicate, | ||
| 81 | // if enabled | ||
| 82 | const Node negate_pred = Operation(OperationCode::LogicalNegate, first_pred); | ||
| 83 | SetPredicate(bb, instr.vsetp.pred0, Operation(combiner, negate_pred, second_pred)); | ||
| 84 | } | ||
| 85 | break; | ||
| 86 | } | ||
| 87 | default: | ||
| 88 | UNIMPLEMENTED_MSG("Unhandled video instruction: {}", opcode->get().GetName()); | ||
| 89 | } | ||
| 90 | |||
| 91 | return pc; | ||
| 92 | } | ||
| 93 | |||
| 94 | Node ShaderIR::GetVideoOperand(Node op, bool is_chunk, bool is_signed, VideoType type, | ||
| 95 | u64 byte_height) { | ||
| 96 | if (!is_chunk) { | ||
| 97 | return BitfieldExtract(op, static_cast<u32>(byte_height * 8), 8); | ||
| 98 | } | ||
| 99 | |||
| 100 | switch (type) { | ||
| 101 | case VideoType::Size16_Low: | ||
| 102 | return BitfieldExtract(op, 0, 16); | ||
| 103 | case VideoType::Size16_High: | ||
| 104 | return BitfieldExtract(op, 16, 16); | ||
| 105 | case VideoType::Size32: | ||
| 106 | // TODO(Rodrigo): From my hardware tests it becomes a bit "mad" when this type is used | ||
| 107 | // (1 * 1 + 0 == 0x5b800000). Until a better explanation is found: abort. | ||
| 108 | UNIMPLEMENTED(); | ||
| 109 | return Immediate(0); | ||
| 110 | case VideoType::Invalid: | ||
| 111 | UNREACHABLE_MSG("Invalid instruction encoding"); | ||
| 112 | return Immediate(0); | ||
| 113 | default: | ||
| 114 | UNREACHABLE(); | ||
| 115 | return Immediate(0); | ||
| 116 | } | ||
| 117 | } | ||
| 118 | |||
| 119 | void ShaderIR::DecodeVMNMX(NodeBlock& bb, Tegra::Shader::Instruction instr) { | ||
| 120 | UNIMPLEMENTED_IF(!instr.vmnmx.is_op_b_register); | ||
| 121 | UNIMPLEMENTED_IF(instr.vmnmx.SourceFormatA() != VmnmxType::Bits32); | ||
| 122 | UNIMPLEMENTED_IF(instr.vmnmx.SourceFormatB() != VmnmxType::Bits32); | ||
| 123 | UNIMPLEMENTED_IF(instr.vmnmx.is_src_a_signed != instr.vmnmx.is_src_b_signed); | ||
| 124 | UNIMPLEMENTED_IF(instr.vmnmx.sat); | ||
| 125 | UNIMPLEMENTED_IF(instr.generates_cc); | ||
| 126 | |||
| 127 | Node op_a = GetRegister(instr.gpr8); | ||
| 128 | Node op_b = GetRegister(instr.gpr20); | ||
| 129 | Node op_c = GetRegister(instr.gpr39); | ||
| 130 | |||
| 131 | const bool is_oper1_signed = instr.vmnmx.is_src_a_signed; // Stubbed | ||
| 132 | const bool is_oper2_signed = instr.vmnmx.is_dest_signed; | ||
| 133 | |||
| 134 | const auto operation_a = instr.vmnmx.mx ? OperationCode::IMax : OperationCode::IMin; | ||
| 135 | Node value = SignedOperation(operation_a, is_oper1_signed, move(op_a), move(op_b)); | ||
| 136 | |||
| 137 | switch (instr.vmnmx.operation) { | ||
| 138 | case VmnmxOperation::Mrg_16H: | ||
| 139 | value = BitfieldInsert(move(op_c), move(value), 16, 16); | ||
| 140 | break; | ||
| 141 | case VmnmxOperation::Mrg_16L: | ||
| 142 | value = BitfieldInsert(move(op_c), move(value), 0, 16); | ||
| 143 | break; | ||
| 144 | case VmnmxOperation::Mrg_8B0: | ||
| 145 | value = BitfieldInsert(move(op_c), move(value), 0, 8); | ||
| 146 | break; | ||
| 147 | case VmnmxOperation::Mrg_8B2: | ||
| 148 | value = BitfieldInsert(move(op_c), move(value), 16, 8); | ||
| 149 | break; | ||
| 150 | case VmnmxOperation::Acc: | ||
| 151 | value = Operation(OperationCode::IAdd, move(value), move(op_c)); | ||
| 152 | break; | ||
| 153 | case VmnmxOperation::Min: | ||
| 154 | value = SignedOperation(OperationCode::IMin, is_oper2_signed, move(value), move(op_c)); | ||
| 155 | break; | ||
| 156 | case VmnmxOperation::Max: | ||
| 157 | value = SignedOperation(OperationCode::IMax, is_oper2_signed, move(value), move(op_c)); | ||
| 158 | break; | ||
| 159 | case VmnmxOperation::Nop: | ||
| 160 | break; | ||
| 161 | default: | ||
| 162 | UNREACHABLE(); | ||
| 163 | break; | ||
| 164 | } | ||
| 165 | |||
| 166 | SetRegister(bb, instr.gpr0, move(value)); | ||
| 167 | } | ||
| 168 | |||
| 169 | } // namespace VideoCommon::Shader | ||
diff --git a/src/video_core/shader/decode/warp.cpp b/src/video_core/shader/decode/warp.cpp deleted file mode 100644 index 37433d783..000000000 --- a/src/video_core/shader/decode/warp.cpp +++ /dev/null | |||
| @@ -1,117 +0,0 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/assert.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "video_core/engines/shader_bytecode.h" | ||
| 8 | #include "video_core/shader/node_helper.h" | ||
| 9 | #include "video_core/shader/shader_ir.h" | ||
| 10 | |||
| 11 | namespace VideoCommon::Shader { | ||
| 12 | |||
| 13 | using Tegra::Shader::Instruction; | ||
| 14 | using Tegra::Shader::OpCode; | ||
| 15 | using Tegra::Shader::Pred; | ||
| 16 | using Tegra::Shader::ShuffleOperation; | ||
| 17 | using Tegra::Shader::VoteOperation; | ||
| 18 | |||
| 19 | namespace { | ||
| 20 | |||
| 21 | OperationCode GetOperationCode(VoteOperation vote_op) { | ||
| 22 | switch (vote_op) { | ||
| 23 | case VoteOperation::All: | ||
| 24 | return OperationCode::VoteAll; | ||
| 25 | case VoteOperation::Any: | ||
| 26 | return OperationCode::VoteAny; | ||
| 27 | case VoteOperation::Eq: | ||
| 28 | return OperationCode::VoteEqual; | ||
| 29 | default: | ||
| 30 | UNREACHABLE_MSG("Invalid vote operation={}", vote_op); | ||
| 31 | return OperationCode::VoteAll; | ||
| 32 | } | ||
| 33 | } | ||
| 34 | |||
| 35 | } // Anonymous namespace | ||
| 36 | |||
| 37 | u32 ShaderIR::DecodeWarp(NodeBlock& bb, u32 pc) { | ||
| 38 | const Instruction instr = {program_code[pc]}; | ||
| 39 | const auto opcode = OpCode::Decode(instr); | ||
| 40 | |||
| 41 | // Signal the backend that this shader uses warp instructions. | ||
| 42 | uses_warps = true; | ||
| 43 | |||
| 44 | switch (opcode->get().GetId()) { | ||
| 45 | case OpCode::Id::VOTE: { | ||
| 46 | const Node value = GetPredicate(instr.vote.value, instr.vote.negate_value != 0); | ||
| 47 | const Node active = Operation(OperationCode::BallotThread, value); | ||
| 48 | const Node vote = Operation(GetOperationCode(instr.vote.operation), value); | ||
| 49 | SetRegister(bb, instr.gpr0, active); | ||
| 50 | SetPredicate(bb, instr.vote.dest_pred, vote); | ||
| 51 | break; | ||
| 52 | } | ||
| 53 | case OpCode::Id::SHFL: { | ||
| 54 | Node mask = instr.shfl.is_mask_imm ? Immediate(static_cast<u32>(instr.shfl.mask_imm)) | ||
| 55 | : GetRegister(instr.gpr39); | ||
| 56 | Node index = instr.shfl.is_index_imm ? Immediate(static_cast<u32>(instr.shfl.index_imm)) | ||
| 57 | : GetRegister(instr.gpr20); | ||
| 58 | |||
| 59 | Node thread_id = Operation(OperationCode::ThreadId); | ||
| 60 | Node clamp = Operation(OperationCode::IBitwiseAnd, mask, Immediate(0x1FU)); | ||
| 61 | Node seg_mask = BitfieldExtract(mask, 8, 16); | ||
| 62 | |||
| 63 | Node neg_seg_mask = Operation(OperationCode::IBitwiseNot, seg_mask); | ||
| 64 | Node min_thread_id = Operation(OperationCode::IBitwiseAnd, thread_id, seg_mask); | ||
| 65 | Node max_thread_id = Operation(OperationCode::IBitwiseOr, min_thread_id, | ||
| 66 | Operation(OperationCode::IBitwiseAnd, clamp, neg_seg_mask)); | ||
| 67 | |||
| 68 | Node src_thread_id = [instr, index, neg_seg_mask, min_thread_id, thread_id] { | ||
| 69 | switch (instr.shfl.operation) { | ||
| 70 | case ShuffleOperation::Idx: | ||
| 71 | return Operation(OperationCode::IBitwiseOr, | ||
| 72 | Operation(OperationCode::IBitwiseAnd, index, neg_seg_mask), | ||
| 73 | min_thread_id); | ||
| 74 | case ShuffleOperation::Down: | ||
| 75 | return Operation(OperationCode::IAdd, thread_id, index); | ||
| 76 | case ShuffleOperation::Up: | ||
| 77 | return Operation(OperationCode::IAdd, thread_id, | ||
| 78 | Operation(OperationCode::INegate, index)); | ||
| 79 | case ShuffleOperation::Bfly: | ||
| 80 | return Operation(OperationCode::IBitwiseXor, thread_id, index); | ||
| 81 | } | ||
| 82 | UNREACHABLE(); | ||
| 83 | return Immediate(0U); | ||
| 84 | }(); | ||
| 85 | |||
| 86 | Node in_bounds = [instr, src_thread_id, min_thread_id, max_thread_id] { | ||
| 87 | if (instr.shfl.operation == ShuffleOperation::Up) { | ||
| 88 | return Operation(OperationCode::LogicalIGreaterEqual, src_thread_id, min_thread_id); | ||
| 89 | } else { | ||
| 90 | return Operation(OperationCode::LogicalILessEqual, src_thread_id, max_thread_id); | ||
| 91 | } | ||
| 92 | }(); | ||
| 93 | |||
| 94 | SetPredicate(bb, instr.shfl.pred48, in_bounds); | ||
| 95 | SetRegister( | ||
| 96 | bb, instr.gpr0, | ||
| 97 | Operation(OperationCode::ShuffleIndexed, GetRegister(instr.gpr8), src_thread_id)); | ||
| 98 | break; | ||
| 99 | } | ||
| 100 | case OpCode::Id::FSWZADD: { | ||
| 101 | UNIMPLEMENTED_IF(instr.fswzadd.ndv); | ||
| 102 | |||
| 103 | Node op_a = GetRegister(instr.gpr8); | ||
| 104 | Node op_b = GetRegister(instr.gpr20); | ||
| 105 | Node mask = Immediate(static_cast<u32>(instr.fswzadd.swizzle)); | ||
| 106 | SetRegister(bb, instr.gpr0, Operation(OperationCode::FSwizzleAdd, op_a, op_b, mask)); | ||
| 107 | break; | ||
| 108 | } | ||
| 109 | default: | ||
| 110 | UNIMPLEMENTED_MSG("Unhandled warp instruction: {}", opcode->get().GetName()); | ||
| 111 | break; | ||
| 112 | } | ||
| 113 | |||
| 114 | return pc; | ||
| 115 | } | ||
| 116 | |||
| 117 | } // namespace VideoCommon::Shader | ||
diff --git a/src/video_core/shader/decode/xmad.cpp b/src/video_core/shader/decode/xmad.cpp deleted file mode 100644 index 233b8fa42..000000000 --- a/src/video_core/shader/decode/xmad.cpp +++ /dev/null | |||
| @@ -1,156 +0,0 @@ | |||
| 1 | // Copyright 2018 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/assert.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "video_core/engines/shader_bytecode.h" | ||
| 8 | #include "video_core/shader/node_helper.h" | ||
| 9 | #include "video_core/shader/shader_ir.h" | ||
| 10 | |||
| 11 | namespace VideoCommon::Shader { | ||
| 12 | |||
| 13 | using Tegra::Shader::Instruction; | ||
| 14 | using Tegra::Shader::OpCode; | ||
| 15 | using Tegra::Shader::PredCondition; | ||
| 16 | |||
| 17 | u32 ShaderIR::DecodeXmad(NodeBlock& bb, u32 pc) { | ||
| 18 | const Instruction instr = {program_code[pc]}; | ||
| 19 | const auto opcode = OpCode::Decode(instr); | ||
| 20 | |||
| 21 | UNIMPLEMENTED_IF(instr.xmad.sign_a); | ||
| 22 | UNIMPLEMENTED_IF(instr.xmad.sign_b); | ||
| 23 | UNIMPLEMENTED_IF_MSG(instr.generates_cc, | ||
| 24 | "Condition codes generation in XMAD is not implemented"); | ||
| 25 | |||
| 26 | Node op_a = GetRegister(instr.gpr8); | ||
| 27 | |||
| 28 | // TODO(bunnei): Needs to be fixed once op_a or op_b is signed | ||
| 29 | UNIMPLEMENTED_IF(instr.xmad.sign_a != instr.xmad.sign_b); | ||
| 30 | const bool is_signed_a = instr.xmad.sign_a == 1; | ||
| 31 | const bool is_signed_b = instr.xmad.sign_b == 1; | ||
| 32 | const bool is_signed_c = is_signed_a; | ||
| 33 | |||
| 34 | auto [is_merge, is_psl, is_high_b, mode, op_b_binding, | ||
| 35 | op_c] = [&]() -> std::tuple<bool, bool, bool, Tegra::Shader::XmadMode, Node, Node> { | ||
| 36 | switch (opcode->get().GetId()) { | ||
| 37 | case OpCode::Id::XMAD_CR: | ||
| 38 | return {instr.xmad.merge_56, | ||
| 39 | instr.xmad.product_shift_left_second, | ||
| 40 | instr.xmad.high_b, | ||
| 41 | instr.xmad.mode_cbf, | ||
| 42 | GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()), | ||
| 43 | GetRegister(instr.gpr39)}; | ||
| 44 | case OpCode::Id::XMAD_RR: | ||
| 45 | return {instr.xmad.merge_37, instr.xmad.product_shift_left, instr.xmad.high_b_rr, | ||
| 46 | instr.xmad.mode, GetRegister(instr.gpr20), GetRegister(instr.gpr39)}; | ||
| 47 | case OpCode::Id::XMAD_RC: | ||
| 48 | return {false, | ||
| 49 | false, | ||
| 50 | instr.xmad.high_b, | ||
| 51 | instr.xmad.mode_cbf, | ||
| 52 | GetRegister(instr.gpr39), | ||
| 53 | GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset())}; | ||
| 54 | case OpCode::Id::XMAD_IMM: | ||
| 55 | return {instr.xmad.merge_37, | ||
| 56 | instr.xmad.product_shift_left, | ||
| 57 | false, | ||
| 58 | instr.xmad.mode, | ||
| 59 | Immediate(static_cast<u32>(instr.xmad.imm20_16)), | ||
| 60 | GetRegister(instr.gpr39)}; | ||
| 61 | default: | ||
| 62 | UNIMPLEMENTED_MSG("Unhandled XMAD instruction: {}", opcode->get().GetName()); | ||
| 63 | return {false, false, false, Tegra::Shader::XmadMode::None, Immediate(0), Immediate(0)}; | ||
| 64 | } | ||
| 65 | }(); | ||
| 66 | |||
| 67 | op_a = SignedOperation(OperationCode::IBitfieldExtract, is_signed_a, std::move(op_a), | ||
| 68 | instr.xmad.high_a ? Immediate(16) : Immediate(0), Immediate(16)); | ||
| 69 | |||
| 70 | const Node original_b = op_b_binding; | ||
| 71 | const Node op_b = | ||
| 72 | SignedOperation(OperationCode::IBitfieldExtract, is_signed_b, std::move(op_b_binding), | ||
| 73 | is_high_b ? Immediate(16) : Immediate(0), Immediate(16)); | ||
| 74 | |||
| 75 | // we already check sign_a and sign_b is difference or not before so just use one in here. | ||
| 76 | Node product = SignedOperation(OperationCode::IMul, is_signed_a, op_a, op_b); | ||
| 77 | if (is_psl) { | ||
| 78 | product = | ||
| 79 | SignedOperation(OperationCode::ILogicalShiftLeft, is_signed_a, product, Immediate(16)); | ||
| 80 | } | ||
| 81 | SetTemporary(bb, 0, product); | ||
| 82 | product = GetTemporary(0); | ||
| 83 | |||
| 84 | Node original_c = op_c; | ||
| 85 | const Tegra::Shader::XmadMode set_mode = mode; // Workaround to clang compile error | ||
| 86 | op_c = [&] { | ||
| 87 | switch (set_mode) { | ||
| 88 | case Tegra::Shader::XmadMode::None: | ||
| 89 | return original_c; | ||
| 90 | case Tegra::Shader::XmadMode::CLo: | ||
| 91 | return BitfieldExtract(std::move(original_c), 0, 16); | ||
| 92 | case Tegra::Shader::XmadMode::CHi: | ||
| 93 | return BitfieldExtract(std::move(original_c), 16, 16); | ||
| 94 | case Tegra::Shader::XmadMode::CBcc: { | ||
| 95 | Node shifted_b = SignedOperation(OperationCode::ILogicalShiftLeft, is_signed_b, | ||
| 96 | original_b, Immediate(16)); | ||
| 97 | return SignedOperation(OperationCode::IAdd, is_signed_c, std::move(original_c), | ||
| 98 | std::move(shifted_b)); | ||
| 99 | } | ||
| 100 | case Tegra::Shader::XmadMode::CSfu: { | ||
| 101 | const Node comp_a = | ||
| 102 | GetPredicateComparisonInteger(PredCondition::EQ, is_signed_a, op_a, Immediate(0)); | ||
| 103 | const Node comp_b = | ||
| 104 | GetPredicateComparisonInteger(PredCondition::EQ, is_signed_b, op_b, Immediate(0)); | ||
| 105 | const Node comp = Operation(OperationCode::LogicalOr, comp_a, comp_b); | ||
| 106 | |||
| 107 | const Node comp_minus_a = GetPredicateComparisonInteger( | ||
| 108 | PredCondition::NE, is_signed_a, | ||
| 109 | SignedOperation(OperationCode::IBitwiseAnd, is_signed_a, op_a, | ||
| 110 | Immediate(0x80000000)), | ||
| 111 | Immediate(0)); | ||
| 112 | const Node comp_minus_b = GetPredicateComparisonInteger( | ||
| 113 | PredCondition::NE, is_signed_b, | ||
| 114 | SignedOperation(OperationCode::IBitwiseAnd, is_signed_b, op_b, | ||
| 115 | Immediate(0x80000000)), | ||
| 116 | Immediate(0)); | ||
| 117 | |||
| 118 | Node new_c = Operation( | ||
| 119 | OperationCode::Select, comp_minus_a, | ||
| 120 | SignedOperation(OperationCode::IAdd, is_signed_c, original_c, Immediate(-65536)), | ||
| 121 | original_c); | ||
| 122 | new_c = Operation( | ||
| 123 | OperationCode::Select, comp_minus_b, | ||
| 124 | SignedOperation(OperationCode::IAdd, is_signed_c, new_c, Immediate(-65536)), | ||
| 125 | std::move(new_c)); | ||
| 126 | |||
| 127 | return Operation(OperationCode::Select, comp, original_c, std::move(new_c)); | ||
| 128 | } | ||
| 129 | default: | ||
| 130 | UNREACHABLE(); | ||
| 131 | return Immediate(0); | ||
| 132 | } | ||
| 133 | }(); | ||
| 134 | |||
| 135 | SetTemporary(bb, 1, op_c); | ||
| 136 | op_c = GetTemporary(1); | ||
| 137 | |||
| 138 | // TODO(Rodrigo): Use an appropiate sign for this operation | ||
| 139 | Node sum = SignedOperation(OperationCode::IAdd, is_signed_a, product, std::move(op_c)); | ||
| 140 | SetTemporary(bb, 2, sum); | ||
| 141 | sum = GetTemporary(2); | ||
| 142 | if (is_merge) { | ||
| 143 | const Node a = SignedOperation(OperationCode::IBitfieldExtract, is_signed_a, std::move(sum), | ||
| 144 | Immediate(0), Immediate(16)); | ||
| 145 | const Node b = SignedOperation(OperationCode::ILogicalShiftLeft, is_signed_b, original_b, | ||
| 146 | Immediate(16)); | ||
| 147 | sum = SignedOperation(OperationCode::IBitwiseOr, is_signed_a, a, b); | ||
| 148 | } | ||
| 149 | |||
| 150 | SetInternalFlagsFromInteger(bb, sum, instr.generates_cc); | ||
| 151 | SetRegister(bb, instr.gpr0, std::move(sum)); | ||
| 152 | |||
| 153 | return pc; | ||
| 154 | } | ||
| 155 | |||
| 156 | } // namespace VideoCommon::Shader | ||
diff --git a/src/video_core/shader/expr.cpp b/src/video_core/shader/expr.cpp deleted file mode 100644 index 2647865d4..000000000 --- a/src/video_core/shader/expr.cpp +++ /dev/null | |||
| @@ -1,93 +0,0 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <memory> | ||
| 6 | #include <variant> | ||
| 7 | |||
| 8 | #include "video_core/shader/expr.h" | ||
| 9 | |||
| 10 | namespace VideoCommon::Shader { | ||
| 11 | namespace { | ||
| 12 | bool ExprIsBoolean(const Expr& expr) { | ||
| 13 | return std::holds_alternative<ExprBoolean>(*expr); | ||
| 14 | } | ||
| 15 | |||
| 16 | bool ExprBooleanGet(const Expr& expr) { | ||
| 17 | return std::get_if<ExprBoolean>(expr.get())->value; | ||
| 18 | } | ||
| 19 | } // Anonymous namespace | ||
| 20 | |||
| 21 | bool ExprAnd::operator==(const ExprAnd& b) const { | ||
| 22 | return (*operand1 == *b.operand1) && (*operand2 == *b.operand2); | ||
| 23 | } | ||
| 24 | |||
| 25 | bool ExprAnd::operator!=(const ExprAnd& b) const { | ||
| 26 | return !operator==(b); | ||
| 27 | } | ||
| 28 | |||
| 29 | bool ExprOr::operator==(const ExprOr& b) const { | ||
| 30 | return (*operand1 == *b.operand1) && (*operand2 == *b.operand2); | ||
| 31 | } | ||
| 32 | |||
| 33 | bool ExprOr::operator!=(const ExprOr& b) const { | ||
| 34 | return !operator==(b); | ||
| 35 | } | ||
| 36 | |||
| 37 | bool ExprNot::operator==(const ExprNot& b) const { | ||
| 38 | return *operand1 == *b.operand1; | ||
| 39 | } | ||
| 40 | |||
| 41 | bool ExprNot::operator!=(const ExprNot& b) const { | ||
| 42 | return !operator==(b); | ||
| 43 | } | ||
| 44 | |||
| 45 | Expr MakeExprNot(Expr first) { | ||
| 46 | if (std::holds_alternative<ExprNot>(*first)) { | ||
| 47 | return std::get_if<ExprNot>(first.get())->operand1; | ||
| 48 | } | ||
| 49 | return MakeExpr<ExprNot>(std::move(first)); | ||
| 50 | } | ||
| 51 | |||
| 52 | Expr MakeExprAnd(Expr first, Expr second) { | ||
| 53 | if (ExprIsBoolean(first)) { | ||
| 54 | return ExprBooleanGet(first) ? second : first; | ||
| 55 | } | ||
| 56 | if (ExprIsBoolean(second)) { | ||
| 57 | return ExprBooleanGet(second) ? first : second; | ||
| 58 | } | ||
| 59 | return MakeExpr<ExprAnd>(std::move(first), std::move(second)); | ||
| 60 | } | ||
| 61 | |||
| 62 | Expr MakeExprOr(Expr first, Expr second) { | ||
| 63 | if (ExprIsBoolean(first)) { | ||
| 64 | return ExprBooleanGet(first) ? first : second; | ||
| 65 | } | ||
| 66 | if (ExprIsBoolean(second)) { | ||
| 67 | return ExprBooleanGet(second) ? second : first; | ||
| 68 | } | ||
| 69 | return MakeExpr<ExprOr>(std::move(first), std::move(second)); | ||
| 70 | } | ||
| 71 | |||
| 72 | bool ExprAreEqual(const Expr& first, const Expr& second) { | ||
| 73 | return (*first) == (*second); | ||
| 74 | } | ||
| 75 | |||
| 76 | bool ExprAreOpposite(const Expr& first, const Expr& second) { | ||
| 77 | if (std::holds_alternative<ExprNot>(*first)) { | ||
| 78 | return ExprAreEqual(std::get_if<ExprNot>(first.get())->operand1, second); | ||
| 79 | } | ||
| 80 | if (std::holds_alternative<ExprNot>(*second)) { | ||
| 81 | return ExprAreEqual(std::get_if<ExprNot>(second.get())->operand1, first); | ||
| 82 | } | ||
| 83 | return false; | ||
| 84 | } | ||
| 85 | |||
| 86 | bool ExprIsTrue(const Expr& first) { | ||
| 87 | if (ExprIsBoolean(first)) { | ||
| 88 | return ExprBooleanGet(first); | ||
| 89 | } | ||
| 90 | return false; | ||
| 91 | } | ||
| 92 | |||
| 93 | } // namespace VideoCommon::Shader | ||
diff --git a/src/video_core/shader/expr.h b/src/video_core/shader/expr.h deleted file mode 100644 index cda284c72..000000000 --- a/src/video_core/shader/expr.h +++ /dev/null | |||
| @@ -1,156 +0,0 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <memory> | ||
| 8 | #include <variant> | ||
| 9 | |||
| 10 | #include "video_core/engines/shader_bytecode.h" | ||
| 11 | |||
| 12 | namespace VideoCommon::Shader { | ||
| 13 | |||
| 14 | using Tegra::Shader::ConditionCode; | ||
| 15 | using Tegra::Shader::Pred; | ||
| 16 | |||
| 17 | class ExprAnd; | ||
| 18 | class ExprBoolean; | ||
| 19 | class ExprCondCode; | ||
| 20 | class ExprGprEqual; | ||
| 21 | class ExprNot; | ||
| 22 | class ExprOr; | ||
| 23 | class ExprPredicate; | ||
| 24 | class ExprVar; | ||
| 25 | |||
| 26 | using ExprData = std::variant<ExprVar, ExprCondCode, ExprPredicate, ExprNot, ExprOr, ExprAnd, | ||
| 27 | ExprBoolean, ExprGprEqual>; | ||
| 28 | using Expr = std::shared_ptr<ExprData>; | ||
| 29 | |||
| 30 | class ExprAnd final { | ||
| 31 | public: | ||
| 32 | explicit ExprAnd(Expr a, Expr b) : operand1{std::move(a)}, operand2{std::move(b)} {} | ||
| 33 | |||
| 34 | bool operator==(const ExprAnd& b) const; | ||
| 35 | bool operator!=(const ExprAnd& b) const; | ||
| 36 | |||
| 37 | Expr operand1; | ||
| 38 | Expr operand2; | ||
| 39 | }; | ||
| 40 | |||
| 41 | class ExprOr final { | ||
| 42 | public: | ||
| 43 | explicit ExprOr(Expr a, Expr b) : operand1{std::move(a)}, operand2{std::move(b)} {} | ||
| 44 | |||
| 45 | bool operator==(const ExprOr& b) const; | ||
| 46 | bool operator!=(const ExprOr& b) const; | ||
| 47 | |||
| 48 | Expr operand1; | ||
| 49 | Expr operand2; | ||
| 50 | }; | ||
| 51 | |||
| 52 | class ExprNot final { | ||
| 53 | public: | ||
| 54 | explicit ExprNot(Expr a) : operand1{std::move(a)} {} | ||
| 55 | |||
| 56 | bool operator==(const ExprNot& b) const; | ||
| 57 | bool operator!=(const ExprNot& b) const; | ||
| 58 | |||
| 59 | Expr operand1; | ||
| 60 | }; | ||
| 61 | |||
| 62 | class ExprVar final { | ||
| 63 | public: | ||
| 64 | explicit ExprVar(u32 index) : var_index{index} {} | ||
| 65 | |||
| 66 | bool operator==(const ExprVar& b) const { | ||
| 67 | return var_index == b.var_index; | ||
| 68 | } | ||
| 69 | |||
| 70 | bool operator!=(const ExprVar& b) const { | ||
| 71 | return !operator==(b); | ||
| 72 | } | ||
| 73 | |||
| 74 | u32 var_index; | ||
| 75 | }; | ||
| 76 | |||
| 77 | class ExprPredicate final { | ||
| 78 | public: | ||
| 79 | explicit ExprPredicate(u32 predicate_) : predicate{predicate_} {} | ||
| 80 | |||
| 81 | bool operator==(const ExprPredicate& b) const { | ||
| 82 | return predicate == b.predicate; | ||
| 83 | } | ||
| 84 | |||
| 85 | bool operator!=(const ExprPredicate& b) const { | ||
| 86 | return !operator==(b); | ||
| 87 | } | ||
| 88 | |||
| 89 | u32 predicate; | ||
| 90 | }; | ||
| 91 | |||
| 92 | class ExprCondCode final { | ||
| 93 | public: | ||
| 94 | explicit ExprCondCode(ConditionCode condition_code) : cc{condition_code} {} | ||
| 95 | |||
| 96 | bool operator==(const ExprCondCode& b) const { | ||
| 97 | return cc == b.cc; | ||
| 98 | } | ||
| 99 | |||
| 100 | bool operator!=(const ExprCondCode& b) const { | ||
| 101 | return !operator==(b); | ||
| 102 | } | ||
| 103 | |||
| 104 | ConditionCode cc; | ||
| 105 | }; | ||
| 106 | |||
| 107 | class ExprBoolean final { | ||
| 108 | public: | ||
| 109 | explicit ExprBoolean(bool val) : value{val} {} | ||
| 110 | |||
| 111 | bool operator==(const ExprBoolean& b) const { | ||
| 112 | return value == b.value; | ||
| 113 | } | ||
| 114 | |||
| 115 | bool operator!=(const ExprBoolean& b) const { | ||
| 116 | return !operator==(b); | ||
| 117 | } | ||
| 118 | |||
| 119 | bool value; | ||
| 120 | }; | ||
| 121 | |||
| 122 | class ExprGprEqual final { | ||
| 123 | public: | ||
| 124 | explicit ExprGprEqual(u32 gpr_, u32 value_) : gpr{gpr_}, value{value_} {} | ||
| 125 | |||
| 126 | bool operator==(const ExprGprEqual& b) const { | ||
| 127 | return gpr == b.gpr && value == b.value; | ||
| 128 | } | ||
| 129 | |||
| 130 | bool operator!=(const ExprGprEqual& b) const { | ||
| 131 | return !operator==(b); | ||
| 132 | } | ||
| 133 | |||
| 134 | u32 gpr; | ||
| 135 | u32 value; | ||
| 136 | }; | ||
| 137 | |||
| 138 | template <typename T, typename... Args> | ||
| 139 | Expr MakeExpr(Args&&... args) { | ||
| 140 | static_assert(std::is_convertible_v<T, ExprData>); | ||
| 141 | return std::make_shared<ExprData>(T(std::forward<Args>(args)...)); | ||
| 142 | } | ||
| 143 | |||
| 144 | bool ExprAreEqual(const Expr& first, const Expr& second); | ||
| 145 | |||
| 146 | bool ExprAreOpposite(const Expr& first, const Expr& second); | ||
| 147 | |||
| 148 | Expr MakeExprNot(Expr first); | ||
| 149 | |||
| 150 | Expr MakeExprAnd(Expr first, Expr second); | ||
| 151 | |||
| 152 | Expr MakeExprOr(Expr first, Expr second); | ||
| 153 | |||
| 154 | bool ExprIsTrue(const Expr& first); | ||
| 155 | |||
| 156 | } // namespace VideoCommon::Shader | ||
diff --git a/src/video_core/shader/memory_util.cpp b/src/video_core/shader/memory_util.cpp deleted file mode 100644 index e18ccba8e..000000000 --- a/src/video_core/shader/memory_util.cpp +++ /dev/null | |||
| @@ -1,76 +0,0 @@ | |||
| 1 | // Copyright 2020 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <algorithm> | ||
| 6 | #include <cstddef> | ||
| 7 | |||
| 8 | #include <boost/container_hash/hash.hpp> | ||
| 9 | |||
| 10 | #include "common/common_types.h" | ||
| 11 | #include "core/core.h" | ||
| 12 | #include "video_core/engines/maxwell_3d.h" | ||
| 13 | #include "video_core/memory_manager.h" | ||
| 14 | #include "video_core/shader/memory_util.h" | ||
| 15 | #include "video_core/shader/shader_ir.h" | ||
| 16 | |||
| 17 | namespace VideoCommon::Shader { | ||
| 18 | |||
| 19 | GPUVAddr GetShaderAddress(Tegra::Engines::Maxwell3D& maxwell3d, | ||
| 20 | Tegra::Engines::Maxwell3D::Regs::ShaderProgram program) { | ||
| 21 | const auto& shader_config{maxwell3d.regs.shader_config[static_cast<std::size_t>(program)]}; | ||
| 22 | return maxwell3d.regs.code_address.CodeAddress() + shader_config.offset; | ||
| 23 | } | ||
| 24 | |||
| 25 | bool IsSchedInstruction(std::size_t offset, std::size_t main_offset) { | ||
| 26 | // Sched instructions appear once every 4 instructions. | ||
| 27 | constexpr std::size_t SchedPeriod = 4; | ||
| 28 | const std::size_t absolute_offset = offset - main_offset; | ||
| 29 | return (absolute_offset % SchedPeriod) == 0; | ||
| 30 | } | ||
| 31 | |||
| 32 | std::size_t CalculateProgramSize(const ProgramCode& program, bool is_compute) { | ||
| 33 | // This is the encoded version of BRA that jumps to itself. All Nvidia | ||
| 34 | // shaders end with one. | ||
| 35 | static constexpr u64 SELF_JUMPING_BRANCH = 0xE2400FFFFF07000FULL; | ||
| 36 | static constexpr u64 MASK = 0xFFFFFFFFFF7FFFFFULL; | ||
| 37 | |||
| 38 | const std::size_t start_offset = is_compute ? KERNEL_MAIN_OFFSET : STAGE_MAIN_OFFSET; | ||
| 39 | std::size_t offset = start_offset; | ||
| 40 | while (offset < program.size()) { | ||
| 41 | const u64 instruction = program[offset]; | ||
| 42 | if (!IsSchedInstruction(offset, start_offset)) { | ||
| 43 | if ((instruction & MASK) == SELF_JUMPING_BRANCH) { | ||
| 44 | // End on Maxwell's "nop" instruction | ||
| 45 | break; | ||
| 46 | } | ||
| 47 | if (instruction == 0) { | ||
| 48 | break; | ||
| 49 | } | ||
| 50 | } | ||
| 51 | ++offset; | ||
| 52 | } | ||
| 53 | // The last instruction is included in the program size | ||
| 54 | return std::min(offset + 1, program.size()); | ||
| 55 | } | ||
| 56 | |||
| 57 | ProgramCode GetShaderCode(Tegra::MemoryManager& memory_manager, GPUVAddr gpu_addr, | ||
| 58 | const u8* host_ptr, bool is_compute) { | ||
| 59 | ProgramCode code(VideoCommon::Shader::MAX_PROGRAM_LENGTH); | ||
| 60 | ASSERT_OR_EXECUTE(host_ptr != nullptr, { return code; }); | ||
| 61 | memory_manager.ReadBlockUnsafe(gpu_addr, code.data(), code.size() * sizeof(u64)); | ||
| 62 | code.resize(CalculateProgramSize(code, is_compute)); | ||
| 63 | return code; | ||
| 64 | } | ||
| 65 | |||
| 66 | u64 GetUniqueIdentifier(Tegra::Engines::ShaderType shader_type, bool is_a, const ProgramCode& code, | ||
| 67 | const ProgramCode& code_b) { | ||
| 68 | size_t unique_identifier = boost::hash_value(code); | ||
| 69 | if (is_a) { | ||
| 70 | // VertexA programs include two programs | ||
| 71 | boost::hash_combine(unique_identifier, boost::hash_value(code_b)); | ||
| 72 | } | ||
| 73 | return static_cast<u64>(unique_identifier); | ||
| 74 | } | ||
| 75 | |||
| 76 | } // namespace VideoCommon::Shader | ||
diff --git a/src/video_core/shader/memory_util.h b/src/video_core/shader/memory_util.h deleted file mode 100644 index 4624d38e6..000000000 --- a/src/video_core/shader/memory_util.h +++ /dev/null | |||
| @@ -1,43 +0,0 @@ | |||
| 1 | // Copyright 2020 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <cstddef> | ||
| 8 | #include <vector> | ||
| 9 | |||
| 10 | #include "common/common_types.h" | ||
| 11 | #include "video_core/engines/maxwell_3d.h" | ||
| 12 | #include "video_core/engines/shader_type.h" | ||
| 13 | |||
| 14 | namespace Tegra { | ||
| 15 | class MemoryManager; | ||
| 16 | } | ||
| 17 | |||
| 18 | namespace VideoCommon::Shader { | ||
| 19 | |||
| 20 | using ProgramCode = std::vector<u64>; | ||
| 21 | |||
| 22 | constexpr u32 STAGE_MAIN_OFFSET = 10; | ||
| 23 | constexpr u32 KERNEL_MAIN_OFFSET = 0; | ||
| 24 | |||
| 25 | /// Gets the address for the specified shader stage program | ||
| 26 | GPUVAddr GetShaderAddress(Tegra::Engines::Maxwell3D& maxwell3d, | ||
| 27 | Tegra::Engines::Maxwell3D::Regs::ShaderProgram program); | ||
| 28 | |||
| 29 | /// Gets if the current instruction offset is a scheduler instruction | ||
| 30 | bool IsSchedInstruction(std::size_t offset, std::size_t main_offset); | ||
| 31 | |||
| 32 | /// Calculates the size of a program stream | ||
| 33 | std::size_t CalculateProgramSize(const ProgramCode& program, bool is_compute); | ||
| 34 | |||
| 35 | /// Gets the shader program code from memory for the specified address | ||
| 36 | ProgramCode GetShaderCode(Tegra::MemoryManager& memory_manager, GPUVAddr gpu_addr, | ||
| 37 | const u8* host_ptr, bool is_compute); | ||
| 38 | |||
| 39 | /// Hashes one (or two) program streams | ||
| 40 | u64 GetUniqueIdentifier(Tegra::Engines::ShaderType shader_type, bool is_a, const ProgramCode& code, | ||
| 41 | const ProgramCode& code_b = {}); | ||
| 42 | |||
| 43 | } // namespace VideoCommon::Shader | ||
diff --git a/src/video_core/shader/node.h b/src/video_core/shader/node.h deleted file mode 100644 index b54d33763..000000000 --- a/src/video_core/shader/node.h +++ /dev/null | |||
| @@ -1,701 +0,0 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <array> | ||
| 8 | #include <cstddef> | ||
| 9 | #include <memory> | ||
| 10 | #include <optional> | ||
| 11 | #include <string> | ||
| 12 | #include <tuple> | ||
| 13 | #include <utility> | ||
| 14 | #include <variant> | ||
| 15 | #include <vector> | ||
| 16 | |||
| 17 | #include "common/common_types.h" | ||
| 18 | #include "video_core/engines/shader_bytecode.h" | ||
| 19 | |||
| 20 | namespace VideoCommon::Shader { | ||
| 21 | |||
| 22 | enum class OperationCode { | ||
| 23 | Assign, /// (float& dest, float src) -> void | ||
| 24 | |||
| 25 | Select, /// (MetaArithmetic, bool pred, float a, float b) -> float | ||
| 26 | |||
| 27 | FAdd, /// (MetaArithmetic, float a, float b) -> float | ||
| 28 | FMul, /// (MetaArithmetic, float a, float b) -> float | ||
| 29 | FDiv, /// (MetaArithmetic, float a, float b) -> float | ||
| 30 | FFma, /// (MetaArithmetic, float a, float b, float c) -> float | ||
| 31 | FNegate, /// (MetaArithmetic, float a) -> float | ||
| 32 | FAbsolute, /// (MetaArithmetic, float a) -> float | ||
| 33 | FClamp, /// (MetaArithmetic, float value, float min, float max) -> float | ||
| 34 | FCastHalf0, /// (MetaArithmetic, f16vec2 a) -> float | ||
| 35 | FCastHalf1, /// (MetaArithmetic, f16vec2 a) -> float | ||
| 36 | FMin, /// (MetaArithmetic, float a, float b) -> float | ||
| 37 | FMax, /// (MetaArithmetic, float a, float b) -> float | ||
| 38 | FCos, /// (MetaArithmetic, float a) -> float | ||
| 39 | FSin, /// (MetaArithmetic, float a) -> float | ||
| 40 | FExp2, /// (MetaArithmetic, float a) -> float | ||
| 41 | FLog2, /// (MetaArithmetic, float a) -> float | ||
| 42 | FInverseSqrt, /// (MetaArithmetic, float a) -> float | ||
| 43 | FSqrt, /// (MetaArithmetic, float a) -> float | ||
| 44 | FRoundEven, /// (MetaArithmetic, float a) -> float | ||
| 45 | FFloor, /// (MetaArithmetic, float a) -> float | ||
| 46 | FCeil, /// (MetaArithmetic, float a) -> float | ||
| 47 | FTrunc, /// (MetaArithmetic, float a) -> float | ||
| 48 | FCastInteger, /// (MetaArithmetic, int a) -> float | ||
| 49 | FCastUInteger, /// (MetaArithmetic, uint a) -> float | ||
| 50 | FSwizzleAdd, /// (float a, float b, uint mask) -> float | ||
| 51 | |||
| 52 | IAdd, /// (MetaArithmetic, int a, int b) -> int | ||
| 53 | IMul, /// (MetaArithmetic, int a, int b) -> int | ||
| 54 | IDiv, /// (MetaArithmetic, int a, int b) -> int | ||
| 55 | INegate, /// (MetaArithmetic, int a) -> int | ||
| 56 | IAbsolute, /// (MetaArithmetic, int a) -> int | ||
| 57 | IMin, /// (MetaArithmetic, int a, int b) -> int | ||
| 58 | IMax, /// (MetaArithmetic, int a, int b) -> int | ||
| 59 | ICastFloat, /// (MetaArithmetic, float a) -> int | ||
| 60 | ICastUnsigned, /// (MetaArithmetic, uint a) -> int | ||
| 61 | ILogicalShiftLeft, /// (MetaArithmetic, int a, uint b) -> int | ||
| 62 | ILogicalShiftRight, /// (MetaArithmetic, int a, uint b) -> int | ||
| 63 | IArithmeticShiftRight, /// (MetaArithmetic, int a, uint b) -> int | ||
| 64 | IBitwiseAnd, /// (MetaArithmetic, int a, int b) -> int | ||
| 65 | IBitwiseOr, /// (MetaArithmetic, int a, int b) -> int | ||
| 66 | IBitwiseXor, /// (MetaArithmetic, int a, int b) -> int | ||
| 67 | IBitwiseNot, /// (MetaArithmetic, int a) -> int | ||
| 68 | IBitfieldInsert, /// (MetaArithmetic, int base, int insert, int offset, int bits) -> int | ||
| 69 | IBitfieldExtract, /// (MetaArithmetic, int value, int offset, int offset) -> int | ||
| 70 | IBitCount, /// (MetaArithmetic, int) -> int | ||
| 71 | IBitMSB, /// (MetaArithmetic, int) -> int | ||
| 72 | |||
| 73 | UAdd, /// (MetaArithmetic, uint a, uint b) -> uint | ||
| 74 | UMul, /// (MetaArithmetic, uint a, uint b) -> uint | ||
| 75 | UDiv, /// (MetaArithmetic, uint a, uint b) -> uint | ||
| 76 | UMin, /// (MetaArithmetic, uint a, uint b) -> uint | ||
| 77 | UMax, /// (MetaArithmetic, uint a, uint b) -> uint | ||
| 78 | UCastFloat, /// (MetaArithmetic, float a) -> uint | ||
| 79 | UCastSigned, /// (MetaArithmetic, int a) -> uint | ||
| 80 | ULogicalShiftLeft, /// (MetaArithmetic, uint a, uint b) -> uint | ||
| 81 | ULogicalShiftRight, /// (MetaArithmetic, uint a, uint b) -> uint | ||
| 82 | UArithmeticShiftRight, /// (MetaArithmetic, uint a, uint b) -> uint | ||
| 83 | UBitwiseAnd, /// (MetaArithmetic, uint a, uint b) -> uint | ||
| 84 | UBitwiseOr, /// (MetaArithmetic, uint a, uint b) -> uint | ||
| 85 | UBitwiseXor, /// (MetaArithmetic, uint a, uint b) -> uint | ||
| 86 | UBitwiseNot, /// (MetaArithmetic, uint a) -> uint | ||
| 87 | UBitfieldInsert, /// (MetaArithmetic, uint base, uint insert, int offset, int bits) -> uint | ||
| 88 | UBitfieldExtract, /// (MetaArithmetic, uint value, int offset, int offset) -> uint | ||
| 89 | UBitCount, /// (MetaArithmetic, uint) -> uint | ||
| 90 | UBitMSB, /// (MetaArithmetic, uint) -> uint | ||
| 91 | |||
| 92 | HAdd, /// (MetaArithmetic, f16vec2 a, f16vec2 b) -> f16vec2 | ||
| 93 | HMul, /// (MetaArithmetic, f16vec2 a, f16vec2 b) -> f16vec2 | ||
| 94 | HFma, /// (MetaArithmetic, f16vec2 a, f16vec2 b, f16vec2 c) -> f16vec2 | ||
| 95 | HAbsolute, /// (f16vec2 a) -> f16vec2 | ||
| 96 | HNegate, /// (f16vec2 a, bool first, bool second) -> f16vec2 | ||
| 97 | HClamp, /// (f16vec2 src, float min, float max) -> f16vec2 | ||
| 98 | HCastFloat, /// (MetaArithmetic, float a) -> f16vec2 | ||
| 99 | HUnpack, /// (Tegra::Shader::HalfType, T value) -> f16vec2 | ||
| 100 | HMergeF32, /// (f16vec2 src) -> float | ||
| 101 | HMergeH0, /// (f16vec2 dest, f16vec2 src) -> f16vec2 | ||
| 102 | HMergeH1, /// (f16vec2 dest, f16vec2 src) -> f16vec2 | ||
| 103 | HPack2, /// (float a, float b) -> f16vec2 | ||
| 104 | |||
| 105 | LogicalAssign, /// (bool& dst, bool src) -> void | ||
| 106 | LogicalAnd, /// (bool a, bool b) -> bool | ||
| 107 | LogicalOr, /// (bool a, bool b) -> bool | ||
| 108 | LogicalXor, /// (bool a, bool b) -> bool | ||
| 109 | LogicalNegate, /// (bool a) -> bool | ||
| 110 | LogicalPick2, /// (bool2 pair, uint index) -> bool | ||
| 111 | LogicalAnd2, /// (bool2 a) -> bool | ||
| 112 | |||
| 113 | LogicalFOrdLessThan, /// (float a, float b) -> bool | ||
| 114 | LogicalFOrdEqual, /// (float a, float b) -> bool | ||
| 115 | LogicalFOrdLessEqual, /// (float a, float b) -> bool | ||
| 116 | LogicalFOrdGreaterThan, /// (float a, float b) -> bool | ||
| 117 | LogicalFOrdNotEqual, /// (float a, float b) -> bool | ||
| 118 | LogicalFOrdGreaterEqual, /// (float a, float b) -> bool | ||
| 119 | LogicalFOrdered, /// (float a, float b) -> bool | ||
| 120 | LogicalFUnordered, /// (float a, float b) -> bool | ||
| 121 | LogicalFUnordLessThan, /// (float a, float b) -> bool | ||
| 122 | LogicalFUnordEqual, /// (float a, float b) -> bool | ||
| 123 | LogicalFUnordLessEqual, /// (float a, float b) -> bool | ||
| 124 | LogicalFUnordGreaterThan, /// (float a, float b) -> bool | ||
| 125 | LogicalFUnordNotEqual, /// (float a, float b) -> bool | ||
| 126 | LogicalFUnordGreaterEqual, /// (float a, float b) -> bool | ||
| 127 | |||
| 128 | LogicalILessThan, /// (int a, int b) -> bool | ||
| 129 | LogicalIEqual, /// (int a, int b) -> bool | ||
| 130 | LogicalILessEqual, /// (int a, int b) -> bool | ||
| 131 | LogicalIGreaterThan, /// (int a, int b) -> bool | ||
| 132 | LogicalINotEqual, /// (int a, int b) -> bool | ||
| 133 | LogicalIGreaterEqual, /// (int a, int b) -> bool | ||
| 134 | |||
| 135 | LogicalULessThan, /// (uint a, uint b) -> bool | ||
| 136 | LogicalUEqual, /// (uint a, uint b) -> bool | ||
| 137 | LogicalULessEqual, /// (uint a, uint b) -> bool | ||
| 138 | LogicalUGreaterThan, /// (uint a, uint b) -> bool | ||
| 139 | LogicalUNotEqual, /// (uint a, uint b) -> bool | ||
| 140 | LogicalUGreaterEqual, /// (uint a, uint b) -> bool | ||
| 141 | |||
| 142 | LogicalAddCarry, /// (uint a, uint b) -> bool | ||
| 143 | |||
| 144 | Logical2HLessThan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 | ||
| 145 | Logical2HEqual, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 | ||
| 146 | Logical2HLessEqual, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 | ||
| 147 | Logical2HGreaterThan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 | ||
| 148 | Logical2HNotEqual, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 | ||
| 149 | Logical2HGreaterEqual, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 | ||
| 150 | Logical2HLessThanWithNan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 | ||
| 151 | Logical2HEqualWithNan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 | ||
| 152 | Logical2HLessEqualWithNan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 | ||
| 153 | Logical2HGreaterThanWithNan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 | ||
| 154 | Logical2HNotEqualWithNan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 | ||
| 155 | Logical2HGreaterEqualWithNan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 | ||
| 156 | |||
| 157 | Texture, /// (MetaTexture, float[N] coords) -> float4 | ||
| 158 | TextureLod, /// (MetaTexture, float[N] coords) -> float4 | ||
| 159 | TextureGather, /// (MetaTexture, float[N] coords) -> float4 | ||
| 160 | TextureQueryDimensions, /// (MetaTexture, float a) -> float4 | ||
| 161 | TextureQueryLod, /// (MetaTexture, float[N] coords) -> float4 | ||
| 162 | TexelFetch, /// (MetaTexture, int[N], int) -> float4 | ||
| 163 | TextureGradient, /// (MetaTexture, float[N] coords, float[N*2] derivates) -> float4 | ||
| 164 | |||
| 165 | ImageLoad, /// (MetaImage, int[N] coords) -> void | ||
| 166 | ImageStore, /// (MetaImage, int[N] coords) -> void | ||
| 167 | |||
| 168 | AtomicImageAdd, /// (MetaImage, int[N] coords) -> void | ||
| 169 | AtomicImageAnd, /// (MetaImage, int[N] coords) -> void | ||
| 170 | AtomicImageOr, /// (MetaImage, int[N] coords) -> void | ||
| 171 | AtomicImageXor, /// (MetaImage, int[N] coords) -> void | ||
| 172 | AtomicImageExchange, /// (MetaImage, int[N] coords) -> void | ||
| 173 | |||
| 174 | AtomicUExchange, /// (memory, uint) -> uint | ||
| 175 | AtomicUAdd, /// (memory, uint) -> uint | ||
| 176 | AtomicUMin, /// (memory, uint) -> uint | ||
| 177 | AtomicUMax, /// (memory, uint) -> uint | ||
| 178 | AtomicUAnd, /// (memory, uint) -> uint | ||
| 179 | AtomicUOr, /// (memory, uint) -> uint | ||
| 180 | AtomicUXor, /// (memory, uint) -> uint | ||
| 181 | |||
| 182 | AtomicIExchange, /// (memory, int) -> int | ||
| 183 | AtomicIAdd, /// (memory, int) -> int | ||
| 184 | AtomicIMin, /// (memory, int) -> int | ||
| 185 | AtomicIMax, /// (memory, int) -> int | ||
| 186 | AtomicIAnd, /// (memory, int) -> int | ||
| 187 | AtomicIOr, /// (memory, int) -> int | ||
| 188 | AtomicIXor, /// (memory, int) -> int | ||
| 189 | |||
| 190 | ReduceUAdd, /// (memory, uint) -> void | ||
| 191 | ReduceUMin, /// (memory, uint) -> void | ||
| 192 | ReduceUMax, /// (memory, uint) -> void | ||
| 193 | ReduceUAnd, /// (memory, uint) -> void | ||
| 194 | ReduceUOr, /// (memory, uint) -> void | ||
| 195 | ReduceUXor, /// (memory, uint) -> void | ||
| 196 | |||
| 197 | ReduceIAdd, /// (memory, int) -> void | ||
| 198 | ReduceIMin, /// (memory, int) -> void | ||
| 199 | ReduceIMax, /// (memory, int) -> void | ||
| 200 | ReduceIAnd, /// (memory, int) -> void | ||
| 201 | ReduceIOr, /// (memory, int) -> void | ||
| 202 | ReduceIXor, /// (memory, int) -> void | ||
| 203 | |||
| 204 | Branch, /// (uint branch_target) -> void | ||
| 205 | BranchIndirect, /// (uint branch_target) -> void | ||
| 206 | PushFlowStack, /// (uint branch_target) -> void | ||
| 207 | PopFlowStack, /// () -> void | ||
| 208 | Exit, /// () -> void | ||
| 209 | Discard, /// () -> void | ||
| 210 | |||
| 211 | EmitVertex, /// () -> void | ||
| 212 | EndPrimitive, /// () -> void | ||
| 213 | |||
| 214 | InvocationId, /// () -> int | ||
| 215 | YNegate, /// () -> float | ||
| 216 | LocalInvocationIdX, /// () -> uint | ||
| 217 | LocalInvocationIdY, /// () -> uint | ||
| 218 | LocalInvocationIdZ, /// () -> uint | ||
| 219 | WorkGroupIdX, /// () -> uint | ||
| 220 | WorkGroupIdY, /// () -> uint | ||
| 221 | WorkGroupIdZ, /// () -> uint | ||
| 222 | |||
| 223 | BallotThread, /// (bool) -> uint | ||
| 224 | VoteAll, /// (bool) -> bool | ||
| 225 | VoteAny, /// (bool) -> bool | ||
| 226 | VoteEqual, /// (bool) -> bool | ||
| 227 | |||
| 228 | ThreadId, /// () -> uint | ||
| 229 | ThreadEqMask, /// () -> uint | ||
| 230 | ThreadGeMask, /// () -> uint | ||
| 231 | ThreadGtMask, /// () -> uint | ||
| 232 | ThreadLeMask, /// () -> uint | ||
| 233 | ThreadLtMask, /// () -> uint | ||
| 234 | ShuffleIndexed, /// (uint value, uint index) -> uint | ||
| 235 | |||
| 236 | Barrier, /// () -> void | ||
| 237 | MemoryBarrierGroup, /// () -> void | ||
| 238 | MemoryBarrierGlobal, /// () -> void | ||
| 239 | |||
| 240 | Amount, | ||
| 241 | }; | ||
| 242 | |||
| 243 | enum class InternalFlag { | ||
| 244 | Zero = 0, | ||
| 245 | Sign = 1, | ||
| 246 | Carry = 2, | ||
| 247 | Overflow = 3, | ||
| 248 | Amount = 4, | ||
| 249 | }; | ||
| 250 | |||
| 251 | enum class MetaStackClass { | ||
| 252 | Ssy, | ||
| 253 | Pbk, | ||
| 254 | }; | ||
| 255 | |||
| 256 | class OperationNode; | ||
| 257 | class ConditionalNode; | ||
| 258 | class GprNode; | ||
| 259 | class CustomVarNode; | ||
| 260 | class ImmediateNode; | ||
| 261 | class InternalFlagNode; | ||
| 262 | class PredicateNode; | ||
| 263 | class AbufNode; | ||
| 264 | class CbufNode; | ||
| 265 | class LmemNode; | ||
| 266 | class PatchNode; | ||
| 267 | class SmemNode; | ||
| 268 | class GmemNode; | ||
| 269 | class CommentNode; | ||
| 270 | |||
| 271 | using NodeData = std::variant<OperationNode, ConditionalNode, GprNode, CustomVarNode, ImmediateNode, | ||
| 272 | InternalFlagNode, PredicateNode, AbufNode, PatchNode, CbufNode, | ||
| 273 | LmemNode, SmemNode, GmemNode, CommentNode>; | ||
| 274 | using Node = std::shared_ptr<NodeData>; | ||
| 275 | using Node4 = std::array<Node, 4>; | ||
| 276 | using NodeBlock = std::vector<Node>; | ||
| 277 | |||
| 278 | struct ArraySamplerNode; | ||
| 279 | struct BindlessSamplerNode; | ||
| 280 | struct SeparateSamplerNode; | ||
| 281 | |||
| 282 | using TrackSamplerData = std::variant<BindlessSamplerNode, SeparateSamplerNode, ArraySamplerNode>; | ||
| 283 | using TrackSampler = std::shared_ptr<TrackSamplerData>; | ||
| 284 | |||
| 285 | struct SamplerEntry { | ||
| 286 | /// Bound samplers constructor | ||
| 287 | explicit SamplerEntry(u32 index_, u32 offset_, Tegra::Shader::TextureType type_, bool is_array_, | ||
| 288 | bool is_shadow_, bool is_buffer_, bool is_indexed_) | ||
| 289 | : index{index_}, offset{offset_}, type{type_}, is_array{is_array_}, is_shadow{is_shadow_}, | ||
| 290 | is_buffer{is_buffer_}, is_indexed{is_indexed_} {} | ||
| 291 | |||
| 292 | /// Separate sampler constructor | ||
| 293 | explicit SamplerEntry(u32 index_, std::pair<u32, u32> offsets, std::pair<u32, u32> buffers, | ||
| 294 | Tegra::Shader::TextureType type_, bool is_array_, bool is_shadow_, | ||
| 295 | bool is_buffer_) | ||
| 296 | : index{index_}, offset{offsets.first}, secondary_offset{offsets.second}, | ||
| 297 | buffer{buffers.first}, secondary_buffer{buffers.second}, type{type_}, is_array{is_array_}, | ||
| 298 | is_shadow{is_shadow_}, is_buffer{is_buffer_}, is_separated{true} {} | ||
| 299 | |||
| 300 | /// Bindless samplers constructor | ||
| 301 | explicit SamplerEntry(u32 index_, u32 offset_, u32 buffer_, Tegra::Shader::TextureType type_, | ||
| 302 | bool is_array_, bool is_shadow_, bool is_buffer_, bool is_indexed_) | ||
| 303 | : index{index_}, offset{offset_}, buffer{buffer_}, type{type_}, is_array{is_array_}, | ||
| 304 | is_shadow{is_shadow_}, is_buffer{is_buffer_}, is_bindless{true}, is_indexed{is_indexed_} { | ||
| 305 | } | ||
| 306 | |||
| 307 | u32 index = 0; ///< Emulated index given for the this sampler. | ||
| 308 | u32 offset = 0; ///< Offset in the const buffer from where the sampler is being read. | ||
| 309 | u32 secondary_offset = 0; ///< Secondary offset in the const buffer. | ||
| 310 | u32 buffer = 0; ///< Buffer where the bindless sampler is read. | ||
| 311 | u32 secondary_buffer = 0; ///< Secondary buffer where the bindless sampler is read. | ||
| 312 | u32 size = 1; ///< Size of the sampler. | ||
| 313 | |||
| 314 | Tegra::Shader::TextureType type{}; ///< The type used to sample this texture (Texture2D, etc) | ||
| 315 | bool is_array = false; ///< Whether the texture is being sampled as an array texture or not. | ||
| 316 | bool is_shadow = false; ///< Whether the texture is being sampled as a depth texture or not. | ||
| 317 | bool is_buffer = false; ///< Whether the texture is a texture buffer without sampler. | ||
| 318 | bool is_bindless = false; ///< Whether this sampler belongs to a bindless texture or not. | ||
| 319 | bool is_indexed = false; ///< Whether this sampler is an indexed array of textures. | ||
| 320 | bool is_separated = false; ///< Whether the image and sampler is separated or not. | ||
| 321 | }; | ||
| 322 | |||
| 323 | /// Represents a tracked bindless sampler into a direct const buffer | ||
| 324 | struct ArraySamplerNode { | ||
| 325 | u32 index; | ||
| 326 | u32 base_offset; | ||
| 327 | u32 bindless_var; | ||
| 328 | }; | ||
| 329 | |||
| 330 | /// Represents a tracked separate sampler image pair that was folded statically | ||
| 331 | struct SeparateSamplerNode { | ||
| 332 | std::pair<u32, u32> indices; | ||
| 333 | std::pair<u32, u32> offsets; | ||
| 334 | }; | ||
| 335 | |||
| 336 | /// Represents a tracked bindless sampler into a direct const buffer | ||
| 337 | struct BindlessSamplerNode { | ||
| 338 | u32 index; | ||
| 339 | u32 offset; | ||
| 340 | }; | ||
| 341 | |||
| 342 | struct ImageEntry { | ||
| 343 | public: | ||
| 344 | /// Bound images constructor | ||
| 345 | explicit ImageEntry(u32 index_, u32 offset_, Tegra::Shader::ImageType type_) | ||
| 346 | : index{index_}, offset{offset_}, type{type_} {} | ||
| 347 | |||
| 348 | /// Bindless samplers constructor | ||
| 349 | explicit ImageEntry(u32 index_, u32 offset_, u32 buffer_, Tegra::Shader::ImageType type_) | ||
| 350 | : index{index_}, offset{offset_}, buffer{buffer_}, type{type_}, is_bindless{true} {} | ||
| 351 | |||
| 352 | void MarkWrite() { | ||
| 353 | is_written = true; | ||
| 354 | } | ||
| 355 | |||
| 356 | void MarkRead() { | ||
| 357 | is_read = true; | ||
| 358 | } | ||
| 359 | |||
| 360 | void MarkAtomic() { | ||
| 361 | MarkWrite(); | ||
| 362 | MarkRead(); | ||
| 363 | is_atomic = true; | ||
| 364 | } | ||
| 365 | |||
| 366 | u32 index = 0; | ||
| 367 | u32 offset = 0; | ||
| 368 | u32 buffer = 0; | ||
| 369 | |||
| 370 | Tegra::Shader::ImageType type{}; | ||
| 371 | bool is_bindless = false; | ||
| 372 | bool is_written = false; | ||
| 373 | bool is_read = false; | ||
| 374 | bool is_atomic = false; | ||
| 375 | }; | ||
| 376 | |||
| 377 | struct GlobalMemoryBase { | ||
| 378 | u32 cbuf_index = 0; | ||
| 379 | u32 cbuf_offset = 0; | ||
| 380 | |||
| 381 | [[nodiscard]] bool operator<(const GlobalMemoryBase& rhs) const { | ||
| 382 | return std::tie(cbuf_index, cbuf_offset) < std::tie(rhs.cbuf_index, rhs.cbuf_offset); | ||
| 383 | } | ||
| 384 | }; | ||
| 385 | |||
| 386 | /// Parameters describing an arithmetic operation | ||
| 387 | struct MetaArithmetic { | ||
| 388 | bool precise{}; ///< Whether the operation can be constraint or not | ||
| 389 | }; | ||
| 390 | |||
| 391 | /// Parameters describing a texture sampler | ||
| 392 | struct MetaTexture { | ||
| 393 | SamplerEntry sampler; | ||
| 394 | Node array; | ||
| 395 | Node depth_compare; | ||
| 396 | std::vector<Node> aoffi; | ||
| 397 | std::vector<Node> ptp; | ||
| 398 | std::vector<Node> derivates; | ||
| 399 | Node bias; | ||
| 400 | Node lod; | ||
| 401 | Node component; | ||
| 402 | u32 element{}; | ||
| 403 | Node index; | ||
| 404 | }; | ||
| 405 | |||
| 406 | struct MetaImage { | ||
| 407 | const ImageEntry& image; | ||
| 408 | std::vector<Node> values; | ||
| 409 | u32 element{}; | ||
| 410 | }; | ||
| 411 | |||
| 412 | /// Parameters that modify an operation but are not part of any particular operand | ||
| 413 | using Meta = | ||
| 414 | std::variant<MetaArithmetic, MetaTexture, MetaImage, MetaStackClass, Tegra::Shader::HalfType>; | ||
| 415 | |||
| 416 | class AmendNode { | ||
| 417 | public: | ||
| 418 | [[nodiscard]] std::optional<std::size_t> GetAmendIndex() const { | ||
| 419 | if (amend_index == amend_null_index) { | ||
| 420 | return std::nullopt; | ||
| 421 | } | ||
| 422 | return {amend_index}; | ||
| 423 | } | ||
| 424 | |||
| 425 | void SetAmendIndex(std::size_t index) { | ||
| 426 | amend_index = index; | ||
| 427 | } | ||
| 428 | |||
| 429 | void ClearAmend() { | ||
| 430 | amend_index = amend_null_index; | ||
| 431 | } | ||
| 432 | |||
| 433 | private: | ||
| 434 | static constexpr std::size_t amend_null_index = 0xFFFFFFFFFFFFFFFFULL; | ||
| 435 | std::size_t amend_index{amend_null_index}; | ||
| 436 | }; | ||
| 437 | |||
| 438 | /// Holds any kind of operation that can be done in the IR | ||
| 439 | class OperationNode final : public AmendNode { | ||
| 440 | public: | ||
| 441 | explicit OperationNode(OperationCode code_) : OperationNode(code_, Meta{}) {} | ||
| 442 | |||
| 443 | explicit OperationNode(OperationCode code_, Meta meta_) | ||
| 444 | : OperationNode(code_, std::move(meta_), std::vector<Node>{}) {} | ||
| 445 | |||
| 446 | explicit OperationNode(OperationCode code_, std::vector<Node> operands_) | ||
| 447 | : OperationNode(code_, Meta{}, std::move(operands_)) {} | ||
| 448 | |||
| 449 | explicit OperationNode(OperationCode code_, Meta meta_, std::vector<Node> operands_) | ||
| 450 | : code{code_}, meta{std::move(meta_)}, operands{std::move(operands_)} {} | ||
| 451 | |||
| 452 | template <typename... Args> | ||
| 453 | explicit OperationNode(OperationCode code_, Meta meta_, Args&&... operands_) | ||
| 454 | : code{code_}, meta{std::move(meta_)}, operands{operands_...} {} | ||
| 455 | |||
| 456 | [[nodiscard]] OperationCode GetCode() const { | ||
| 457 | return code; | ||
| 458 | } | ||
| 459 | |||
| 460 | [[nodiscard]] const Meta& GetMeta() const { | ||
| 461 | return meta; | ||
| 462 | } | ||
| 463 | |||
| 464 | [[nodiscard]] std::size_t GetOperandsCount() const { | ||
| 465 | return operands.size(); | ||
| 466 | } | ||
| 467 | |||
| 468 | [[nodiscard]] const Node& operator[](std::size_t operand_index) const { | ||
| 469 | return operands.at(operand_index); | ||
| 470 | } | ||
| 471 | |||
| 472 | private: | ||
| 473 | OperationCode code{}; | ||
| 474 | Meta meta{}; | ||
| 475 | std::vector<Node> operands; | ||
| 476 | }; | ||
| 477 | |||
| 478 | /// Encloses inside any kind of node that returns a boolean conditionally-executed code | ||
| 479 | class ConditionalNode final : public AmendNode { | ||
| 480 | public: | ||
| 481 | explicit ConditionalNode(Node condition_, std::vector<Node>&& code_) | ||
| 482 | : condition{std::move(condition_)}, code{std::move(code_)} {} | ||
| 483 | |||
| 484 | [[nodiscard]] const Node& GetCondition() const { | ||
| 485 | return condition; | ||
| 486 | } | ||
| 487 | |||
| 488 | [[nodiscard]] const std::vector<Node>& GetCode() const { | ||
| 489 | return code; | ||
| 490 | } | ||
| 491 | |||
| 492 | private: | ||
| 493 | Node condition; ///< Condition to be satisfied | ||
| 494 | std::vector<Node> code; ///< Code to execute | ||
| 495 | }; | ||
| 496 | |||
| 497 | /// A general purpose register | ||
| 498 | class GprNode final { | ||
| 499 | public: | ||
| 500 | explicit constexpr GprNode(Tegra::Shader::Register index_) : index{index_} {} | ||
| 501 | |||
| 502 | [[nodiscard]] constexpr u32 GetIndex() const { | ||
| 503 | return static_cast<u32>(index); | ||
| 504 | } | ||
| 505 | |||
| 506 | private: | ||
| 507 | Tegra::Shader::Register index{}; | ||
| 508 | }; | ||
| 509 | |||
| 510 | /// A custom variable | ||
| 511 | class CustomVarNode final { | ||
| 512 | public: | ||
| 513 | explicit constexpr CustomVarNode(u32 index_) : index{index_} {} | ||
| 514 | |||
| 515 | [[nodiscard]] constexpr u32 GetIndex() const { | ||
| 516 | return index; | ||
| 517 | } | ||
| 518 | |||
| 519 | private: | ||
| 520 | u32 index{}; | ||
| 521 | }; | ||
| 522 | |||
| 523 | /// A 32-bits value that represents an immediate value | ||
| 524 | class ImmediateNode final { | ||
| 525 | public: | ||
| 526 | explicit constexpr ImmediateNode(u32 value_) : value{value_} {} | ||
| 527 | |||
| 528 | [[nodiscard]] constexpr u32 GetValue() const { | ||
| 529 | return value; | ||
| 530 | } | ||
| 531 | |||
| 532 | private: | ||
| 533 | u32 value{}; | ||
| 534 | }; | ||
| 535 | |||
| 536 | /// One of Maxwell's internal flags | ||
| 537 | class InternalFlagNode final { | ||
| 538 | public: | ||
| 539 | explicit constexpr InternalFlagNode(InternalFlag flag_) : flag{flag_} {} | ||
| 540 | |||
| 541 | [[nodiscard]] constexpr InternalFlag GetFlag() const { | ||
| 542 | return flag; | ||
| 543 | } | ||
| 544 | |||
| 545 | private: | ||
| 546 | InternalFlag flag{}; | ||
| 547 | }; | ||
| 548 | |||
| 549 | /// A predicate register, it can be negated without additional nodes | ||
| 550 | class PredicateNode final { | ||
| 551 | public: | ||
| 552 | explicit constexpr PredicateNode(Tegra::Shader::Pred index_, bool negated_) | ||
| 553 | : index{index_}, negated{negated_} {} | ||
| 554 | |||
| 555 | [[nodiscard]] constexpr Tegra::Shader::Pred GetIndex() const { | ||
| 556 | return index; | ||
| 557 | } | ||
| 558 | |||
| 559 | [[nodiscard]] constexpr bool IsNegated() const { | ||
| 560 | return negated; | ||
| 561 | } | ||
| 562 | |||
| 563 | private: | ||
| 564 | Tegra::Shader::Pred index{}; | ||
| 565 | bool negated{}; | ||
| 566 | }; | ||
| 567 | |||
| 568 | /// Attribute buffer memory (known as attributes or varyings in GLSL terms) | ||
| 569 | class AbufNode final { | ||
| 570 | public: | ||
| 571 | // Initialize for standard attributes (index is explicit). | ||
| 572 | explicit AbufNode(Tegra::Shader::Attribute::Index index_, u32 element_, Node buffer_ = {}) | ||
| 573 | : buffer{std::move(buffer_)}, index{index_}, element{element_} {} | ||
| 574 | |||
| 575 | // Initialize for physical attributes (index is a variable value). | ||
| 576 | explicit AbufNode(Node physical_address_, Node buffer_ = {}) | ||
| 577 | : physical_address{std::move(physical_address_)}, buffer{std::move(buffer_)} {} | ||
| 578 | |||
| 579 | [[nodiscard]] Tegra::Shader::Attribute::Index GetIndex() const { | ||
| 580 | return index; | ||
| 581 | } | ||
| 582 | |||
| 583 | [[nodiscard]] u32 GetElement() const { | ||
| 584 | return element; | ||
| 585 | } | ||
| 586 | |||
| 587 | [[nodiscard]] const Node& GetBuffer() const { | ||
| 588 | return buffer; | ||
| 589 | } | ||
| 590 | |||
| 591 | [[nodiscard]] bool IsPhysicalBuffer() const { | ||
| 592 | return static_cast<bool>(physical_address); | ||
| 593 | } | ||
| 594 | |||
| 595 | [[nodiscard]] const Node& GetPhysicalAddress() const { | ||
| 596 | return physical_address; | ||
| 597 | } | ||
| 598 | |||
| 599 | private: | ||
| 600 | Node physical_address; | ||
| 601 | Node buffer; | ||
| 602 | Tegra::Shader::Attribute::Index index{}; | ||
| 603 | u32 element{}; | ||
| 604 | }; | ||
| 605 | |||
| 606 | /// Patch memory (used to communicate tessellation stages). | ||
| 607 | class PatchNode final { | ||
| 608 | public: | ||
| 609 | explicit constexpr PatchNode(u32 offset_) : offset{offset_} {} | ||
| 610 | |||
| 611 | [[nodiscard]] constexpr u32 GetOffset() const { | ||
| 612 | return offset; | ||
| 613 | } | ||
| 614 | |||
| 615 | private: | ||
| 616 | u32 offset{}; | ||
| 617 | }; | ||
| 618 | |||
| 619 | /// Constant buffer node, usually mapped to uniform buffers in GLSL | ||
| 620 | class CbufNode final { | ||
| 621 | public: | ||
| 622 | explicit CbufNode(u32 index_, Node offset_) : index{index_}, offset{std::move(offset_)} {} | ||
| 623 | |||
| 624 | [[nodiscard]] u32 GetIndex() const { | ||
| 625 | return index; | ||
| 626 | } | ||
| 627 | |||
| 628 | [[nodiscard]] const Node& GetOffset() const { | ||
| 629 | return offset; | ||
| 630 | } | ||
| 631 | |||
| 632 | private: | ||
| 633 | u32 index{}; | ||
| 634 | Node offset; | ||
| 635 | }; | ||
| 636 | |||
| 637 | /// Local memory node | ||
| 638 | class LmemNode final { | ||
| 639 | public: | ||
| 640 | explicit LmemNode(Node address_) : address{std::move(address_)} {} | ||
| 641 | |||
| 642 | [[nodiscard]] const Node& GetAddress() const { | ||
| 643 | return address; | ||
| 644 | } | ||
| 645 | |||
| 646 | private: | ||
| 647 | Node address; | ||
| 648 | }; | ||
| 649 | |||
| 650 | /// Shared memory node | ||
| 651 | class SmemNode final { | ||
| 652 | public: | ||
| 653 | explicit SmemNode(Node address_) : address{std::move(address_)} {} | ||
| 654 | |||
| 655 | [[nodiscard]] const Node& GetAddress() const { | ||
| 656 | return address; | ||
| 657 | } | ||
| 658 | |||
| 659 | private: | ||
| 660 | Node address; | ||
| 661 | }; | ||
| 662 | |||
| 663 | /// Global memory node | ||
| 664 | class GmemNode final { | ||
| 665 | public: | ||
| 666 | explicit GmemNode(Node real_address_, Node base_address_, const GlobalMemoryBase& descriptor_) | ||
| 667 | : real_address{std::move(real_address_)}, base_address{std::move(base_address_)}, | ||
| 668 | descriptor{descriptor_} {} | ||
| 669 | |||
| 670 | [[nodiscard]] const Node& GetRealAddress() const { | ||
| 671 | return real_address; | ||
| 672 | } | ||
| 673 | |||
| 674 | [[nodiscard]] const Node& GetBaseAddress() const { | ||
| 675 | return base_address; | ||
| 676 | } | ||
| 677 | |||
| 678 | [[nodiscard]] const GlobalMemoryBase& GetDescriptor() const { | ||
| 679 | return descriptor; | ||
| 680 | } | ||
| 681 | |||
| 682 | private: | ||
| 683 | Node real_address; | ||
| 684 | Node base_address; | ||
| 685 | GlobalMemoryBase descriptor; | ||
| 686 | }; | ||
| 687 | |||
| 688 | /// Commentary, can be dropped | ||
| 689 | class CommentNode final { | ||
| 690 | public: | ||
| 691 | explicit CommentNode(std::string text_) : text{std::move(text_)} {} | ||
| 692 | |||
| 693 | [[nodiscard]] const std::string& GetText() const { | ||
| 694 | return text; | ||
| 695 | } | ||
| 696 | |||
| 697 | private: | ||
| 698 | std::string text; | ||
| 699 | }; | ||
| 700 | |||
| 701 | } // namespace VideoCommon::Shader | ||
diff --git a/src/video_core/shader/node_helper.cpp b/src/video_core/shader/node_helper.cpp deleted file mode 100644 index 6a5b6940d..000000000 --- a/src/video_core/shader/node_helper.cpp +++ /dev/null | |||
| @@ -1,115 +0,0 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <cstring> | ||
| 6 | #include <vector> | ||
| 7 | |||
| 8 | #include "common/common_types.h" | ||
| 9 | #include "video_core/shader/node_helper.h" | ||
| 10 | #include "video_core/shader/shader_ir.h" | ||
| 11 | |||
| 12 | namespace VideoCommon::Shader { | ||
| 13 | |||
| 14 | Node Conditional(Node condition, std::vector<Node> code) { | ||
| 15 | return MakeNode<ConditionalNode>(std::move(condition), std::move(code)); | ||
| 16 | } | ||
| 17 | |||
| 18 | Node Comment(std::string text) { | ||
| 19 | return MakeNode<CommentNode>(std::move(text)); | ||
| 20 | } | ||
| 21 | |||
| 22 | Node Immediate(u32 value) { | ||
| 23 | return MakeNode<ImmediateNode>(value); | ||
| 24 | } | ||
| 25 | |||
| 26 | Node Immediate(s32 value) { | ||
| 27 | return Immediate(static_cast<u32>(value)); | ||
| 28 | } | ||
| 29 | |||
| 30 | Node Immediate(f32 value) { | ||
| 31 | u32 integral; | ||
| 32 | std::memcpy(&integral, &value, sizeof(u32)); | ||
| 33 | return Immediate(integral); | ||
| 34 | } | ||
| 35 | |||
| 36 | OperationCode SignedToUnsignedCode(OperationCode operation_code, bool is_signed) { | ||
| 37 | if (is_signed) { | ||
| 38 | return operation_code; | ||
| 39 | } | ||
| 40 | switch (operation_code) { | ||
| 41 | case OperationCode::FCastInteger: | ||
| 42 | return OperationCode::FCastUInteger; | ||
| 43 | case OperationCode::IAdd: | ||
| 44 | return OperationCode::UAdd; | ||
| 45 | case OperationCode::IMul: | ||
| 46 | return OperationCode::UMul; | ||
| 47 | case OperationCode::IDiv: | ||
| 48 | return OperationCode::UDiv; | ||
| 49 | case OperationCode::IMin: | ||
| 50 | return OperationCode::UMin; | ||
| 51 | case OperationCode::IMax: | ||
| 52 | return OperationCode::UMax; | ||
| 53 | case OperationCode::ICastFloat: | ||
| 54 | return OperationCode::UCastFloat; | ||
| 55 | case OperationCode::ICastUnsigned: | ||
| 56 | return OperationCode::UCastSigned; | ||
| 57 | case OperationCode::ILogicalShiftLeft: | ||
| 58 | return OperationCode::ULogicalShiftLeft; | ||
| 59 | case OperationCode::ILogicalShiftRight: | ||
| 60 | return OperationCode::ULogicalShiftRight; | ||
| 61 | case OperationCode::IArithmeticShiftRight: | ||
| 62 | return OperationCode::UArithmeticShiftRight; | ||
| 63 | case OperationCode::IBitwiseAnd: | ||
| 64 | return OperationCode::UBitwiseAnd; | ||
| 65 | case OperationCode::IBitwiseOr: | ||
| 66 | return OperationCode::UBitwiseOr; | ||
| 67 | case OperationCode::IBitwiseXor: | ||
| 68 | return OperationCode::UBitwiseXor; | ||
| 69 | case OperationCode::IBitwiseNot: | ||
| 70 | return OperationCode::UBitwiseNot; | ||
| 71 | case OperationCode::IBitfieldExtract: | ||
| 72 | return OperationCode::UBitfieldExtract; | ||
| 73 | case OperationCode::IBitfieldInsert: | ||
| 74 | return OperationCode::UBitfieldInsert; | ||
| 75 | case OperationCode::IBitCount: | ||
| 76 | return OperationCode::UBitCount; | ||
| 77 | case OperationCode::LogicalILessThan: | ||
| 78 | return OperationCode::LogicalULessThan; | ||
| 79 | case OperationCode::LogicalIEqual: | ||
| 80 | return OperationCode::LogicalUEqual; | ||
| 81 | case OperationCode::LogicalILessEqual: | ||
| 82 | return OperationCode::LogicalULessEqual; | ||
| 83 | case OperationCode::LogicalIGreaterThan: | ||
| 84 | return OperationCode::LogicalUGreaterThan; | ||
| 85 | case OperationCode::LogicalINotEqual: | ||
| 86 | return OperationCode::LogicalUNotEqual; | ||
| 87 | case OperationCode::LogicalIGreaterEqual: | ||
| 88 | return OperationCode::LogicalUGreaterEqual; | ||
| 89 | case OperationCode::AtomicIExchange: | ||
| 90 | return OperationCode::AtomicUExchange; | ||
| 91 | case OperationCode::AtomicIAdd: | ||
| 92 | return OperationCode::AtomicUAdd; | ||
| 93 | case OperationCode::AtomicIMin: | ||
| 94 | return OperationCode::AtomicUMin; | ||
| 95 | case OperationCode::AtomicIMax: | ||
| 96 | return OperationCode::AtomicUMax; | ||
| 97 | case OperationCode::AtomicIAnd: | ||
| 98 | return OperationCode::AtomicUAnd; | ||
| 99 | case OperationCode::AtomicIOr: | ||
| 100 | return OperationCode::AtomicUOr; | ||
| 101 | case OperationCode::AtomicIXor: | ||
| 102 | return OperationCode::AtomicUXor; | ||
| 103 | case OperationCode::INegate: | ||
| 104 | UNREACHABLE_MSG("Can't negate an unsigned integer"); | ||
| 105 | return {}; | ||
| 106 | case OperationCode::IAbsolute: | ||
| 107 | UNREACHABLE_MSG("Can't apply absolute to an unsigned integer"); | ||
| 108 | return {}; | ||
| 109 | default: | ||
| 110 | UNREACHABLE_MSG("Unknown signed operation with code={}", operation_code); | ||
| 111 | return {}; | ||
| 112 | } | ||
| 113 | } | ||
| 114 | |||
| 115 | } // namespace VideoCommon::Shader | ||
diff --git a/src/video_core/shader/node_helper.h b/src/video_core/shader/node_helper.h deleted file mode 100644 index 1e0886185..000000000 --- a/src/video_core/shader/node_helper.h +++ /dev/null | |||
| @@ -1,71 +0,0 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <memory> | ||
| 8 | #include <string> | ||
| 9 | #include <tuple> | ||
| 10 | #include <type_traits> | ||
| 11 | #include <utility> | ||
| 12 | #include <vector> | ||
| 13 | |||
| 14 | #include "common/common_types.h" | ||
| 15 | #include "video_core/shader/node.h" | ||
| 16 | |||
| 17 | namespace VideoCommon::Shader { | ||
| 18 | |||
| 19 | /// This arithmetic operation cannot be constraint | ||
| 20 | inline constexpr MetaArithmetic PRECISE = {true}; | ||
| 21 | /// This arithmetic operation can be optimized away | ||
| 22 | inline constexpr MetaArithmetic NO_PRECISE = {false}; | ||
| 23 | |||
| 24 | /// Creates a conditional node | ||
| 25 | Node Conditional(Node condition, std::vector<Node> code); | ||
| 26 | |||
| 27 | /// Creates a commentary node | ||
| 28 | Node Comment(std::string text); | ||
| 29 | |||
| 30 | /// Creates an u32 immediate | ||
| 31 | Node Immediate(u32 value); | ||
| 32 | |||
| 33 | /// Creates a s32 immediate | ||
| 34 | Node Immediate(s32 value); | ||
| 35 | |||
| 36 | /// Creates a f32 immediate | ||
| 37 | Node Immediate(f32 value); | ||
| 38 | |||
| 39 | /// Converts an signed operation code to an unsigned operation code | ||
| 40 | OperationCode SignedToUnsignedCode(OperationCode operation_code, bool is_signed); | ||
| 41 | |||
| 42 | template <typename T, typename... Args> | ||
| 43 | Node MakeNode(Args&&... args) { | ||
| 44 | static_assert(std::is_convertible_v<T, NodeData>); | ||
| 45 | return std::make_shared<NodeData>(T(std::forward<Args>(args)...)); | ||
| 46 | } | ||
| 47 | |||
| 48 | template <typename T, typename... Args> | ||
| 49 | TrackSampler MakeTrackSampler(Args&&... args) { | ||
| 50 | static_assert(std::is_convertible_v<T, TrackSamplerData>); | ||
| 51 | return std::make_shared<TrackSamplerData>(T{std::forward<Args>(args)...}); | ||
| 52 | } | ||
| 53 | |||
| 54 | template <typename... Args> | ||
| 55 | Node Operation(OperationCode code, Args&&... args) { | ||
| 56 | if constexpr (sizeof...(args) == 0) { | ||
| 57 | return MakeNode<OperationNode>(code); | ||
| 58 | } else if constexpr (std::is_convertible_v<std::tuple_element_t<0, std::tuple<Args...>>, | ||
| 59 | Meta>) { | ||
| 60 | return MakeNode<OperationNode>(code, std::forward<Args>(args)...); | ||
| 61 | } else { | ||
| 62 | return MakeNode<OperationNode>(code, Meta{}, std::forward<Args>(args)...); | ||
| 63 | } | ||
| 64 | } | ||
| 65 | |||
| 66 | template <typename... Args> | ||
| 67 | Node SignedOperation(OperationCode code, bool is_signed, Args&&... args) { | ||
| 68 | return Operation(SignedToUnsignedCode(code, is_signed), std::forward<Args>(args)...); | ||
| 69 | } | ||
| 70 | |||
| 71 | } // namespace VideoCommon::Shader | ||
diff --git a/src/video_core/shader/registry.cpp b/src/video_core/shader/registry.cpp deleted file mode 100644 index 148d91fcb..000000000 --- a/src/video_core/shader/registry.cpp +++ /dev/null | |||
| @@ -1,181 +0,0 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <algorithm> | ||
| 6 | #include <tuple> | ||
| 7 | |||
| 8 | #include "common/assert.h" | ||
| 9 | #include "common/common_types.h" | ||
| 10 | #include "video_core/engines/kepler_compute.h" | ||
| 11 | #include "video_core/engines/maxwell_3d.h" | ||
| 12 | #include "video_core/engines/shader_type.h" | ||
| 13 | #include "video_core/shader/registry.h" | ||
| 14 | |||
| 15 | namespace VideoCommon::Shader { | ||
| 16 | |||
| 17 | using Tegra::Engines::ConstBufferEngineInterface; | ||
| 18 | using Tegra::Engines::SamplerDescriptor; | ||
| 19 | using Tegra::Engines::ShaderType; | ||
| 20 | |||
| 21 | namespace { | ||
| 22 | |||
| 23 | GraphicsInfo MakeGraphicsInfo(ShaderType shader_stage, ConstBufferEngineInterface& engine) { | ||
| 24 | if (shader_stage == ShaderType::Compute) { | ||
| 25 | return {}; | ||
| 26 | } | ||
| 27 | |||
| 28 | auto& graphics = dynamic_cast<Tegra::Engines::Maxwell3D&>(engine); | ||
| 29 | |||
| 30 | return { | ||
| 31 | .tfb_layouts = graphics.regs.tfb_layouts, | ||
| 32 | .tfb_varying_locs = graphics.regs.tfb_varying_locs, | ||
| 33 | .primitive_topology = graphics.regs.draw.topology, | ||
| 34 | .tessellation_primitive = graphics.regs.tess_mode.prim, | ||
| 35 | .tessellation_spacing = graphics.regs.tess_mode.spacing, | ||
| 36 | .tfb_enabled = graphics.regs.tfb_enabled != 0, | ||
| 37 | .tessellation_clockwise = graphics.regs.tess_mode.cw.Value() != 0, | ||
| 38 | }; | ||
| 39 | } | ||
| 40 | |||
| 41 | ComputeInfo MakeComputeInfo(ShaderType shader_stage, ConstBufferEngineInterface& engine) { | ||
| 42 | if (shader_stage != ShaderType::Compute) { | ||
| 43 | return {}; | ||
| 44 | } | ||
| 45 | |||
| 46 | auto& compute = dynamic_cast<Tegra::Engines::KeplerCompute&>(engine); | ||
| 47 | const auto& launch = compute.launch_description; | ||
| 48 | |||
| 49 | return { | ||
| 50 | .workgroup_size = {launch.block_dim_x, launch.block_dim_y, launch.block_dim_z}, | ||
| 51 | .shared_memory_size_in_words = launch.shared_alloc, | ||
| 52 | .local_memory_size_in_words = launch.local_pos_alloc, | ||
| 53 | }; | ||
| 54 | } | ||
| 55 | |||
| 56 | } // Anonymous namespace | ||
| 57 | |||
| 58 | Registry::Registry(ShaderType shader_stage, const SerializedRegistryInfo& info) | ||
| 59 | : stage{shader_stage}, stored_guest_driver_profile{info.guest_driver_profile}, | ||
| 60 | bound_buffer{info.bound_buffer}, graphics_info{info.graphics}, compute_info{info.compute} {} | ||
| 61 | |||
| 62 | Registry::Registry(ShaderType shader_stage, ConstBufferEngineInterface& engine_) | ||
| 63 | : stage{shader_stage}, engine{&engine_}, bound_buffer{engine_.GetBoundBuffer()}, | ||
| 64 | graphics_info{MakeGraphicsInfo(shader_stage, engine_)}, compute_info{MakeComputeInfo( | ||
| 65 | shader_stage, engine_)} {} | ||
| 66 | |||
| 67 | Registry::~Registry() = default; | ||
| 68 | |||
| 69 | std::optional<u32> Registry::ObtainKey(u32 buffer, u32 offset) { | ||
| 70 | const std::pair<u32, u32> key = {buffer, offset}; | ||
| 71 | const auto iter = keys.find(key); | ||
| 72 | if (iter != keys.end()) { | ||
| 73 | return iter->second; | ||
| 74 | } | ||
| 75 | if (!engine) { | ||
| 76 | return std::nullopt; | ||
| 77 | } | ||
| 78 | const u32 value = engine->AccessConstBuffer32(stage, buffer, offset); | ||
| 79 | keys.emplace(key, value); | ||
| 80 | return value; | ||
| 81 | } | ||
| 82 | |||
| 83 | std::optional<SamplerDescriptor> Registry::ObtainBoundSampler(u32 offset) { | ||
| 84 | const u32 key = offset; | ||
| 85 | const auto iter = bound_samplers.find(key); | ||
| 86 | if (iter != bound_samplers.end()) { | ||
| 87 | return iter->second; | ||
| 88 | } | ||
| 89 | if (!engine) { | ||
| 90 | return std::nullopt; | ||
| 91 | } | ||
| 92 | const SamplerDescriptor value = engine->AccessBoundSampler(stage, offset); | ||
| 93 | bound_samplers.emplace(key, value); | ||
| 94 | return value; | ||
| 95 | } | ||
| 96 | |||
| 97 | std::optional<Tegra::Engines::SamplerDescriptor> Registry::ObtainSeparateSampler( | ||
| 98 | std::pair<u32, u32> buffers, std::pair<u32, u32> offsets) { | ||
| 99 | SeparateSamplerKey key; | ||
| 100 | key.buffers = buffers; | ||
| 101 | key.offsets = offsets; | ||
| 102 | const auto iter = separate_samplers.find(key); | ||
| 103 | if (iter != separate_samplers.end()) { | ||
| 104 | return iter->second; | ||
| 105 | } | ||
| 106 | if (!engine) { | ||
| 107 | return std::nullopt; | ||
| 108 | } | ||
| 109 | |||
| 110 | const u32 handle_1 = engine->AccessConstBuffer32(stage, key.buffers.first, key.offsets.first); | ||
| 111 | const u32 handle_2 = engine->AccessConstBuffer32(stage, key.buffers.second, key.offsets.second); | ||
| 112 | const SamplerDescriptor value = engine->AccessSampler(handle_1 | handle_2); | ||
| 113 | separate_samplers.emplace(key, value); | ||
| 114 | return value; | ||
| 115 | } | ||
| 116 | |||
| 117 | std::optional<SamplerDescriptor> Registry::ObtainBindlessSampler(u32 buffer, u32 offset) { | ||
| 118 | const std::pair key = {buffer, offset}; | ||
| 119 | const auto iter = bindless_samplers.find(key); | ||
| 120 | if (iter != bindless_samplers.end()) { | ||
| 121 | return iter->second; | ||
| 122 | } | ||
| 123 | if (!engine) { | ||
| 124 | return std::nullopt; | ||
| 125 | } | ||
| 126 | const SamplerDescriptor value = engine->AccessBindlessSampler(stage, buffer, offset); | ||
| 127 | bindless_samplers.emplace(key, value); | ||
| 128 | return value; | ||
| 129 | } | ||
| 130 | |||
| 131 | void Registry::InsertKey(u32 buffer, u32 offset, u32 value) { | ||
| 132 | keys.insert_or_assign({buffer, offset}, value); | ||
| 133 | } | ||
| 134 | |||
| 135 | void Registry::InsertBoundSampler(u32 offset, SamplerDescriptor sampler) { | ||
| 136 | bound_samplers.insert_or_assign(offset, sampler); | ||
| 137 | } | ||
| 138 | |||
| 139 | void Registry::InsertBindlessSampler(u32 buffer, u32 offset, SamplerDescriptor sampler) { | ||
| 140 | bindless_samplers.insert_or_assign({buffer, offset}, sampler); | ||
| 141 | } | ||
| 142 | |||
| 143 | bool Registry::IsConsistent() const { | ||
| 144 | if (!engine) { | ||
| 145 | return true; | ||
| 146 | } | ||
| 147 | return std::all_of(keys.begin(), keys.end(), | ||
| 148 | [this](const auto& pair) { | ||
| 149 | const auto [cbuf, offset] = pair.first; | ||
| 150 | const auto value = pair.second; | ||
| 151 | return value == engine->AccessConstBuffer32(stage, cbuf, offset); | ||
| 152 | }) && | ||
| 153 | std::all_of(bound_samplers.begin(), bound_samplers.end(), | ||
| 154 | [this](const auto& sampler) { | ||
| 155 | const auto [key, value] = sampler; | ||
| 156 | return value == engine->AccessBoundSampler(stage, key); | ||
| 157 | }) && | ||
| 158 | std::all_of(bindless_samplers.begin(), bindless_samplers.end(), | ||
| 159 | [this](const auto& sampler) { | ||
| 160 | const auto [cbuf, offset] = sampler.first; | ||
| 161 | const auto value = sampler.second; | ||
| 162 | return value == engine->AccessBindlessSampler(stage, cbuf, offset); | ||
| 163 | }); | ||
| 164 | } | ||
| 165 | |||
| 166 | bool Registry::HasEqualKeys(const Registry& rhs) const { | ||
| 167 | return std::tie(keys, bound_samplers, bindless_samplers) == | ||
| 168 | std::tie(rhs.keys, rhs.bound_samplers, rhs.bindless_samplers); | ||
| 169 | } | ||
| 170 | |||
| 171 | const GraphicsInfo& Registry::GetGraphicsInfo() const { | ||
| 172 | ASSERT(stage != Tegra::Engines::ShaderType::Compute); | ||
| 173 | return graphics_info; | ||
| 174 | } | ||
| 175 | |||
| 176 | const ComputeInfo& Registry::GetComputeInfo() const { | ||
| 177 | ASSERT(stage == Tegra::Engines::ShaderType::Compute); | ||
| 178 | return compute_info; | ||
| 179 | } | ||
| 180 | |||
| 181 | } // namespace VideoCommon::Shader | ||
diff --git a/src/video_core/shader/registry.h b/src/video_core/shader/registry.h deleted file mode 100644 index 4bebefdde..000000000 --- a/src/video_core/shader/registry.h +++ /dev/null | |||
| @@ -1,172 +0,0 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <array> | ||
| 8 | #include <optional> | ||
| 9 | #include <type_traits> | ||
| 10 | #include <unordered_map> | ||
| 11 | #include <utility> | ||
| 12 | |||
| 13 | #include "common/common_types.h" | ||
| 14 | #include "common/hash.h" | ||
| 15 | #include "video_core/engines/const_buffer_engine_interface.h" | ||
| 16 | #include "video_core/engines/maxwell_3d.h" | ||
| 17 | #include "video_core/engines/shader_type.h" | ||
| 18 | #include "video_core/guest_driver.h" | ||
| 19 | |||
| 20 | namespace VideoCommon::Shader { | ||
| 21 | |||
| 22 | struct SeparateSamplerKey { | ||
| 23 | std::pair<u32, u32> buffers; | ||
| 24 | std::pair<u32, u32> offsets; | ||
| 25 | }; | ||
| 26 | |||
| 27 | } // namespace VideoCommon::Shader | ||
| 28 | |||
| 29 | namespace std { | ||
| 30 | |||
| 31 | template <> | ||
| 32 | struct hash<VideoCommon::Shader::SeparateSamplerKey> { | ||
| 33 | std::size_t operator()(const VideoCommon::Shader::SeparateSamplerKey& key) const noexcept { | ||
| 34 | return std::hash<u32>{}(key.buffers.first ^ key.buffers.second ^ key.offsets.first ^ | ||
| 35 | key.offsets.second); | ||
| 36 | } | ||
| 37 | }; | ||
| 38 | |||
| 39 | template <> | ||
| 40 | struct equal_to<VideoCommon::Shader::SeparateSamplerKey> { | ||
| 41 | bool operator()(const VideoCommon::Shader::SeparateSamplerKey& lhs, | ||
| 42 | const VideoCommon::Shader::SeparateSamplerKey& rhs) const noexcept { | ||
| 43 | return lhs.buffers == rhs.buffers && lhs.offsets == rhs.offsets; | ||
| 44 | } | ||
| 45 | }; | ||
| 46 | |||
| 47 | } // namespace std | ||
| 48 | |||
| 49 | namespace VideoCommon::Shader { | ||
| 50 | |||
| 51 | using KeyMap = std::unordered_map<std::pair<u32, u32>, u32, Common::PairHash>; | ||
| 52 | using BoundSamplerMap = std::unordered_map<u32, Tegra::Engines::SamplerDescriptor>; | ||
| 53 | using SeparateSamplerMap = | ||
| 54 | std::unordered_map<SeparateSamplerKey, Tegra::Engines::SamplerDescriptor>; | ||
| 55 | using BindlessSamplerMap = | ||
| 56 | std::unordered_map<std::pair<u32, u32>, Tegra::Engines::SamplerDescriptor, Common::PairHash>; | ||
| 57 | |||
| 58 | struct GraphicsInfo { | ||
| 59 | using Maxwell = Tegra::Engines::Maxwell3D::Regs; | ||
| 60 | |||
| 61 | std::array<Maxwell::TransformFeedbackLayout, Maxwell::NumTransformFeedbackBuffers> | ||
| 62 | tfb_layouts{}; | ||
| 63 | std::array<std::array<u8, 128>, Maxwell::NumTransformFeedbackBuffers> tfb_varying_locs{}; | ||
| 64 | Maxwell::PrimitiveTopology primitive_topology{}; | ||
| 65 | Maxwell::TessellationPrimitive tessellation_primitive{}; | ||
| 66 | Maxwell::TessellationSpacing tessellation_spacing{}; | ||
| 67 | bool tfb_enabled = false; | ||
| 68 | bool tessellation_clockwise = false; | ||
| 69 | }; | ||
| 70 | static_assert(std::is_trivially_copyable_v<GraphicsInfo> && | ||
| 71 | std::is_standard_layout_v<GraphicsInfo>); | ||
| 72 | |||
| 73 | struct ComputeInfo { | ||
| 74 | std::array<u32, 3> workgroup_size{}; | ||
| 75 | u32 shared_memory_size_in_words = 0; | ||
| 76 | u32 local_memory_size_in_words = 0; | ||
| 77 | }; | ||
| 78 | static_assert(std::is_trivially_copyable_v<ComputeInfo> && std::is_standard_layout_v<ComputeInfo>); | ||
| 79 | |||
| 80 | struct SerializedRegistryInfo { | ||
| 81 | VideoCore::GuestDriverProfile guest_driver_profile; | ||
| 82 | u32 bound_buffer = 0; | ||
| 83 | GraphicsInfo graphics; | ||
| 84 | ComputeInfo compute; | ||
| 85 | }; | ||
| 86 | |||
| 87 | /** | ||
| 88 | * The Registry is a class use to interface the 3D and compute engines with the shader compiler. | ||
| 89 | * With it, the shader can obtain required data from GPU state and store it for disk shader | ||
| 90 | * compilation. | ||
| 91 | */ | ||
| 92 | class Registry { | ||
| 93 | public: | ||
| 94 | explicit Registry(Tegra::Engines::ShaderType shader_stage, const SerializedRegistryInfo& info); | ||
| 95 | |||
| 96 | explicit Registry(Tegra::Engines::ShaderType shader_stage, | ||
| 97 | Tegra::Engines::ConstBufferEngineInterface& engine_); | ||
| 98 | |||
| 99 | ~Registry(); | ||
| 100 | |||
| 101 | /// Retrieves a key from the registry, if it's registered, it will give the registered value, if | ||
| 102 | /// not it will obtain it from maxwell3d and register it. | ||
| 103 | std::optional<u32> ObtainKey(u32 buffer, u32 offset); | ||
| 104 | |||
| 105 | std::optional<Tegra::Engines::SamplerDescriptor> ObtainBoundSampler(u32 offset); | ||
| 106 | |||
| 107 | std::optional<Tegra::Engines::SamplerDescriptor> ObtainSeparateSampler( | ||
| 108 | std::pair<u32, u32> buffers, std::pair<u32, u32> offsets); | ||
| 109 | |||
| 110 | std::optional<Tegra::Engines::SamplerDescriptor> ObtainBindlessSampler(u32 buffer, u32 offset); | ||
| 111 | |||
| 112 | /// Inserts a key. | ||
| 113 | void InsertKey(u32 buffer, u32 offset, u32 value); | ||
| 114 | |||
| 115 | /// Inserts a bound sampler key. | ||
| 116 | void InsertBoundSampler(u32 offset, Tegra::Engines::SamplerDescriptor sampler); | ||
| 117 | |||
| 118 | /// Inserts a bindless sampler key. | ||
| 119 | void InsertBindlessSampler(u32 buffer, u32 offset, Tegra::Engines::SamplerDescriptor sampler); | ||
| 120 | |||
| 121 | /// Checks keys and samplers against engine's current const buffers. | ||
| 122 | /// Returns true if they are the same value, false otherwise. | ||
| 123 | bool IsConsistent() const; | ||
| 124 | |||
| 125 | /// Returns true if the keys are equal to the other ones in the registry. | ||
| 126 | bool HasEqualKeys(const Registry& rhs) const; | ||
| 127 | |||
| 128 | /// Returns graphics information from this shader | ||
| 129 | const GraphicsInfo& GetGraphicsInfo() const; | ||
| 130 | |||
| 131 | /// Returns compute information from this shader | ||
| 132 | const ComputeInfo& GetComputeInfo() const; | ||
| 133 | |||
| 134 | /// Gives an getter to the const buffer keys in the database. | ||
| 135 | const KeyMap& GetKeys() const { | ||
| 136 | return keys; | ||
| 137 | } | ||
| 138 | |||
| 139 | /// Gets samplers database. | ||
| 140 | const BoundSamplerMap& GetBoundSamplers() const { | ||
| 141 | return bound_samplers; | ||
| 142 | } | ||
| 143 | |||
| 144 | /// Gets bindless samplers database. | ||
| 145 | const BindlessSamplerMap& GetBindlessSamplers() const { | ||
| 146 | return bindless_samplers; | ||
| 147 | } | ||
| 148 | |||
| 149 | /// Gets bound buffer used on this shader | ||
| 150 | u32 GetBoundBuffer() const { | ||
| 151 | return bound_buffer; | ||
| 152 | } | ||
| 153 | |||
| 154 | /// Obtains access to the guest driver's profile. | ||
| 155 | VideoCore::GuestDriverProfile& AccessGuestDriverProfile() { | ||
| 156 | return engine ? engine->AccessGuestDriverProfile() : stored_guest_driver_profile; | ||
| 157 | } | ||
| 158 | |||
| 159 | private: | ||
| 160 | const Tegra::Engines::ShaderType stage; | ||
| 161 | VideoCore::GuestDriverProfile stored_guest_driver_profile; | ||
| 162 | Tegra::Engines::ConstBufferEngineInterface* engine = nullptr; | ||
| 163 | KeyMap keys; | ||
| 164 | BoundSamplerMap bound_samplers; | ||
| 165 | SeparateSamplerMap separate_samplers; | ||
| 166 | BindlessSamplerMap bindless_samplers; | ||
| 167 | u32 bound_buffer; | ||
| 168 | GraphicsInfo graphics_info; | ||
| 169 | ComputeInfo compute_info; | ||
| 170 | }; | ||
| 171 | |||
| 172 | } // namespace VideoCommon::Shader | ||
diff --git a/src/video_core/shader/shader_ir.cpp b/src/video_core/shader/shader_ir.cpp deleted file mode 100644 index a4987ffc6..000000000 --- a/src/video_core/shader/shader_ir.cpp +++ /dev/null | |||
| @@ -1,464 +0,0 @@ | |||
| 1 | // Copyright 2018 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <algorithm> | ||
| 6 | #include <array> | ||
| 7 | #include <cmath> | ||
| 8 | |||
| 9 | #include "common/assert.h" | ||
| 10 | #include "common/common_types.h" | ||
| 11 | #include "common/logging/log.h" | ||
| 12 | #include "video_core/engines/shader_bytecode.h" | ||
| 13 | #include "video_core/shader/node.h" | ||
| 14 | #include "video_core/shader/node_helper.h" | ||
| 15 | #include "video_core/shader/registry.h" | ||
| 16 | #include "video_core/shader/shader_ir.h" | ||
| 17 | |||
| 18 | namespace VideoCommon::Shader { | ||
| 19 | |||
| 20 | using Tegra::Shader::Attribute; | ||
| 21 | using Tegra::Shader::Instruction; | ||
| 22 | using Tegra::Shader::IpaMode; | ||
| 23 | using Tegra::Shader::Pred; | ||
| 24 | using Tegra::Shader::PredCondition; | ||
| 25 | using Tegra::Shader::PredOperation; | ||
| 26 | using Tegra::Shader::Register; | ||
| 27 | |||
| 28 | ShaderIR::ShaderIR(const ProgramCode& program_code_, u32 main_offset_, CompilerSettings settings_, | ||
| 29 | Registry& registry_) | ||
| 30 | : program_code{program_code_}, main_offset{main_offset_}, settings{settings_}, registry{ | ||
| 31 | registry_} { | ||
| 32 | Decode(); | ||
| 33 | PostDecode(); | ||
| 34 | } | ||
| 35 | |||
| 36 | ShaderIR::~ShaderIR() = default; | ||
| 37 | |||
| 38 | Node ShaderIR::GetRegister(Register reg) { | ||
| 39 | if (reg != Register::ZeroIndex) { | ||
| 40 | used_registers.insert(static_cast<u32>(reg)); | ||
| 41 | } | ||
| 42 | return MakeNode<GprNode>(reg); | ||
| 43 | } | ||
| 44 | |||
| 45 | Node ShaderIR::GetCustomVariable(u32 id) { | ||
| 46 | return MakeNode<CustomVarNode>(id); | ||
| 47 | } | ||
| 48 | |||
| 49 | Node ShaderIR::GetImmediate19(Instruction instr) { | ||
| 50 | return Immediate(instr.alu.GetImm20_19()); | ||
| 51 | } | ||
| 52 | |||
| 53 | Node ShaderIR::GetImmediate32(Instruction instr) { | ||
| 54 | return Immediate(instr.alu.GetImm20_32()); | ||
| 55 | } | ||
| 56 | |||
| 57 | Node ShaderIR::GetConstBuffer(u64 index_, u64 offset_) { | ||
| 58 | const auto index = static_cast<u32>(index_); | ||
| 59 | const auto offset = static_cast<u32>(offset_); | ||
| 60 | |||
| 61 | used_cbufs.try_emplace(index).first->second.MarkAsUsed(offset); | ||
| 62 | |||
| 63 | return MakeNode<CbufNode>(index, Immediate(offset)); | ||
| 64 | } | ||
| 65 | |||
| 66 | Node ShaderIR::GetConstBufferIndirect(u64 index_, u64 offset_, Node node) { | ||
| 67 | const auto index = static_cast<u32>(index_); | ||
| 68 | const auto offset = static_cast<u32>(offset_); | ||
| 69 | |||
| 70 | used_cbufs.try_emplace(index).first->second.MarkAsUsedIndirect(); | ||
| 71 | |||
| 72 | Node final_offset = [&] { | ||
| 73 | // Attempt to inline constant buffer without a variable offset. This is done to allow | ||
| 74 | // tracking LDC calls. | ||
| 75 | if (const auto gpr = std::get_if<GprNode>(&*node)) { | ||
| 76 | if (gpr->GetIndex() == Register::ZeroIndex) { | ||
| 77 | return Immediate(offset); | ||
| 78 | } | ||
| 79 | } | ||
| 80 | return Operation(OperationCode::UAdd, NO_PRECISE, std::move(node), Immediate(offset)); | ||
| 81 | }(); | ||
| 82 | return MakeNode<CbufNode>(index, std::move(final_offset)); | ||
| 83 | } | ||
| 84 | |||
| 85 | Node ShaderIR::GetPredicate(u64 pred_, bool negated) { | ||
| 86 | const auto pred = static_cast<Pred>(pred_); | ||
| 87 | if (pred != Pred::UnusedIndex && pred != Pred::NeverExecute) { | ||
| 88 | used_predicates.insert(pred); | ||
| 89 | } | ||
| 90 | |||
| 91 | return MakeNode<PredicateNode>(pred, negated); | ||
| 92 | } | ||
| 93 | |||
| 94 | Node ShaderIR::GetPredicate(bool immediate) { | ||
| 95 | return GetPredicate(static_cast<u64>(immediate ? Pred::UnusedIndex : Pred::NeverExecute)); | ||
| 96 | } | ||
| 97 | |||
| 98 | Node ShaderIR::GetInputAttribute(Attribute::Index index, u64 element, Node buffer) { | ||
| 99 | MarkAttributeUsage(index, element); | ||
| 100 | used_input_attributes.emplace(index); | ||
| 101 | return MakeNode<AbufNode>(index, static_cast<u32>(element), std::move(buffer)); | ||
| 102 | } | ||
| 103 | |||
| 104 | Node ShaderIR::GetPhysicalInputAttribute(Tegra::Shader::Register physical_address, Node buffer) { | ||
| 105 | uses_physical_attributes = true; | ||
| 106 | return MakeNode<AbufNode>(GetRegister(physical_address), buffer); | ||
| 107 | } | ||
| 108 | |||
| 109 | Node ShaderIR::GetOutputAttribute(Attribute::Index index, u64 element, Node buffer) { | ||
| 110 | MarkAttributeUsage(index, element); | ||
| 111 | used_output_attributes.insert(index); | ||
| 112 | return MakeNode<AbufNode>(index, static_cast<u32>(element), std::move(buffer)); | ||
| 113 | } | ||
| 114 | |||
| 115 | Node ShaderIR::GetInternalFlag(InternalFlag flag, bool negated) const { | ||
| 116 | Node node = MakeNode<InternalFlagNode>(flag); | ||
| 117 | if (negated) { | ||
| 118 | return Operation(OperationCode::LogicalNegate, std::move(node)); | ||
| 119 | } | ||
| 120 | return node; | ||
| 121 | } | ||
| 122 | |||
| 123 | Node ShaderIR::GetLocalMemory(Node address) { | ||
| 124 | return MakeNode<LmemNode>(std::move(address)); | ||
| 125 | } | ||
| 126 | |||
| 127 | Node ShaderIR::GetSharedMemory(Node address) { | ||
| 128 | return MakeNode<SmemNode>(std::move(address)); | ||
| 129 | } | ||
| 130 | |||
| 131 | Node ShaderIR::GetTemporary(u32 id) { | ||
| 132 | return GetRegister(Register::ZeroIndex + 1 + id); | ||
| 133 | } | ||
| 134 | |||
| 135 | Node ShaderIR::GetOperandAbsNegFloat(Node value, bool absolute, bool negate) { | ||
| 136 | if (absolute) { | ||
| 137 | value = Operation(OperationCode::FAbsolute, NO_PRECISE, std::move(value)); | ||
| 138 | } | ||
| 139 | if (negate) { | ||
| 140 | value = Operation(OperationCode::FNegate, NO_PRECISE, std::move(value)); | ||
| 141 | } | ||
| 142 | return value; | ||
| 143 | } | ||
| 144 | |||
| 145 | Node ShaderIR::GetSaturatedFloat(Node value, bool saturate) { | ||
| 146 | if (!saturate) { | ||
| 147 | return value; | ||
| 148 | } | ||
| 149 | |||
| 150 | Node positive_zero = Immediate(std::copysignf(0, 1)); | ||
| 151 | Node positive_one = Immediate(1.0f); | ||
| 152 | return Operation(OperationCode::FClamp, NO_PRECISE, std::move(value), std::move(positive_zero), | ||
| 153 | std::move(positive_one)); | ||
| 154 | } | ||
| 155 | |||
| 156 | Node ShaderIR::ConvertIntegerSize(Node value, Register::Size size, bool is_signed) { | ||
| 157 | switch (size) { | ||
| 158 | case Register::Size::Byte: | ||
| 159 | value = SignedOperation(OperationCode::ILogicalShiftLeft, is_signed, NO_PRECISE, | ||
| 160 | std::move(value), Immediate(24)); | ||
| 161 | value = SignedOperation(OperationCode::IArithmeticShiftRight, is_signed, NO_PRECISE, | ||
| 162 | std::move(value), Immediate(24)); | ||
| 163 | return value; | ||
| 164 | case Register::Size::Short: | ||
| 165 | value = SignedOperation(OperationCode::ILogicalShiftLeft, is_signed, NO_PRECISE, | ||
| 166 | std::move(value), Immediate(16)); | ||
| 167 | value = SignedOperation(OperationCode::IArithmeticShiftRight, is_signed, NO_PRECISE, | ||
| 168 | std::move(value), Immediate(16)); | ||
| 169 | return value; | ||
| 170 | case Register::Size::Word: | ||
| 171 | // Default - do nothing | ||
| 172 | return value; | ||
| 173 | default: | ||
| 174 | UNREACHABLE_MSG("Unimplemented conversion size: {}", size); | ||
| 175 | return value; | ||
| 176 | } | ||
| 177 | } | ||
| 178 | |||
| 179 | Node ShaderIR::GetOperandAbsNegInteger(Node value, bool absolute, bool negate, bool is_signed) { | ||
| 180 | if (!is_signed) { | ||
| 181 | // Absolute or negate on an unsigned is pointless | ||
| 182 | return value; | ||
| 183 | } | ||
| 184 | if (absolute) { | ||
| 185 | value = Operation(OperationCode::IAbsolute, NO_PRECISE, std::move(value)); | ||
| 186 | } | ||
| 187 | if (negate) { | ||
| 188 | value = Operation(OperationCode::INegate, NO_PRECISE, std::move(value)); | ||
| 189 | } | ||
| 190 | return value; | ||
| 191 | } | ||
| 192 | |||
| 193 | Node ShaderIR::UnpackHalfImmediate(Instruction instr, bool has_negation) { | ||
| 194 | Node value = Immediate(instr.half_imm.PackImmediates()); | ||
| 195 | if (!has_negation) { | ||
| 196 | return value; | ||
| 197 | } | ||
| 198 | |||
| 199 | Node first_negate = GetPredicate(instr.half_imm.first_negate != 0); | ||
| 200 | Node second_negate = GetPredicate(instr.half_imm.second_negate != 0); | ||
| 201 | |||
| 202 | return Operation(OperationCode::HNegate, NO_PRECISE, std::move(value), std::move(first_negate), | ||
| 203 | std::move(second_negate)); | ||
| 204 | } | ||
| 205 | |||
| 206 | Node ShaderIR::UnpackHalfFloat(Node value, Tegra::Shader::HalfType type) { | ||
| 207 | return Operation(OperationCode::HUnpack, type, std::move(value)); | ||
| 208 | } | ||
| 209 | |||
| 210 | Node ShaderIR::HalfMerge(Node dest, Node src, Tegra::Shader::HalfMerge merge) { | ||
| 211 | switch (merge) { | ||
| 212 | case Tegra::Shader::HalfMerge::H0_H1: | ||
| 213 | return src; | ||
| 214 | case Tegra::Shader::HalfMerge::F32: | ||
| 215 | return Operation(OperationCode::HMergeF32, std::move(src)); | ||
| 216 | case Tegra::Shader::HalfMerge::Mrg_H0: | ||
| 217 | return Operation(OperationCode::HMergeH0, std::move(dest), std::move(src)); | ||
| 218 | case Tegra::Shader::HalfMerge::Mrg_H1: | ||
| 219 | return Operation(OperationCode::HMergeH1, std::move(dest), std::move(src)); | ||
| 220 | } | ||
| 221 | UNREACHABLE(); | ||
| 222 | return src; | ||
| 223 | } | ||
| 224 | |||
| 225 | Node ShaderIR::GetOperandAbsNegHalf(Node value, bool absolute, bool negate) { | ||
| 226 | if (absolute) { | ||
| 227 | value = Operation(OperationCode::HAbsolute, NO_PRECISE, std::move(value)); | ||
| 228 | } | ||
| 229 | if (negate) { | ||
| 230 | value = Operation(OperationCode::HNegate, NO_PRECISE, std::move(value), GetPredicate(true), | ||
| 231 | GetPredicate(true)); | ||
| 232 | } | ||
| 233 | return value; | ||
| 234 | } | ||
| 235 | |||
| 236 | Node ShaderIR::GetSaturatedHalfFloat(Node value, bool saturate) { | ||
| 237 | if (!saturate) { | ||
| 238 | return value; | ||
| 239 | } | ||
| 240 | |||
| 241 | Node positive_zero = Immediate(std::copysignf(0, 1)); | ||
| 242 | Node positive_one = Immediate(1.0f); | ||
| 243 | return Operation(OperationCode::HClamp, NO_PRECISE, std::move(value), std::move(positive_zero), | ||
| 244 | std::move(positive_one)); | ||
| 245 | } | ||
| 246 | |||
| 247 | Node ShaderIR::GetPredicateComparisonFloat(PredCondition condition, Node op_a, Node op_b) { | ||
| 248 | if (condition == PredCondition::T) { | ||
| 249 | return GetPredicate(true); | ||
| 250 | } else if (condition == PredCondition::F) { | ||
| 251 | return GetPredicate(false); | ||
| 252 | } | ||
| 253 | |||
| 254 | static constexpr std::array comparison_table{ | ||
| 255 | OperationCode(0), | ||
| 256 | OperationCode::LogicalFOrdLessThan, // LT | ||
| 257 | OperationCode::LogicalFOrdEqual, // EQ | ||
| 258 | OperationCode::LogicalFOrdLessEqual, // LE | ||
| 259 | OperationCode::LogicalFOrdGreaterThan, // GT | ||
| 260 | OperationCode::LogicalFOrdNotEqual, // NE | ||
| 261 | OperationCode::LogicalFOrdGreaterEqual, // GE | ||
| 262 | OperationCode::LogicalFOrdered, // NUM | ||
| 263 | OperationCode::LogicalFUnordered, // NAN | ||
| 264 | OperationCode::LogicalFUnordLessThan, // LTU | ||
| 265 | OperationCode::LogicalFUnordEqual, // EQU | ||
| 266 | OperationCode::LogicalFUnordLessEqual, // LEU | ||
| 267 | OperationCode::LogicalFUnordGreaterThan, // GTU | ||
| 268 | OperationCode::LogicalFUnordNotEqual, // NEU | ||
| 269 | OperationCode::LogicalFUnordGreaterEqual, // GEU | ||
| 270 | }; | ||
| 271 | const std::size_t index = static_cast<std::size_t>(condition); | ||
| 272 | ASSERT_MSG(index < std::size(comparison_table), "Invalid condition={}", index); | ||
| 273 | |||
| 274 | return Operation(comparison_table[index], op_a, op_b); | ||
| 275 | } | ||
| 276 | |||
| 277 | Node ShaderIR::GetPredicateComparisonInteger(PredCondition condition, bool is_signed, Node op_a, | ||
| 278 | Node op_b) { | ||
| 279 | static constexpr std::array comparison_table{ | ||
| 280 | std::pair{PredCondition::LT, OperationCode::LogicalILessThan}, | ||
| 281 | std::pair{PredCondition::EQ, OperationCode::LogicalIEqual}, | ||
| 282 | std::pair{PredCondition::LE, OperationCode::LogicalILessEqual}, | ||
| 283 | std::pair{PredCondition::GT, OperationCode::LogicalIGreaterThan}, | ||
| 284 | std::pair{PredCondition::NE, OperationCode::LogicalINotEqual}, | ||
| 285 | std::pair{PredCondition::GE, OperationCode::LogicalIGreaterEqual}, | ||
| 286 | }; | ||
| 287 | |||
| 288 | const auto comparison = | ||
| 289 | std::find_if(comparison_table.cbegin(), comparison_table.cend(), | ||
| 290 | [condition](const auto entry) { return condition == entry.first; }); | ||
| 291 | UNIMPLEMENTED_IF_MSG(comparison == comparison_table.cend(), | ||
| 292 | "Unknown predicate comparison operation"); | ||
| 293 | |||
| 294 | return SignedOperation(comparison->second, is_signed, NO_PRECISE, std::move(op_a), | ||
| 295 | std::move(op_b)); | ||
| 296 | } | ||
| 297 | |||
| 298 | Node ShaderIR::GetPredicateComparisonHalf(Tegra::Shader::PredCondition condition, Node op_a, | ||
| 299 | Node op_b) { | ||
| 300 | static constexpr std::array comparison_table{ | ||
| 301 | std::pair{PredCondition::LT, OperationCode::Logical2HLessThan}, | ||
| 302 | std::pair{PredCondition::EQ, OperationCode::Logical2HEqual}, | ||
| 303 | std::pair{PredCondition::LE, OperationCode::Logical2HLessEqual}, | ||
| 304 | std::pair{PredCondition::GT, OperationCode::Logical2HGreaterThan}, | ||
| 305 | std::pair{PredCondition::NE, OperationCode::Logical2HNotEqual}, | ||
| 306 | std::pair{PredCondition::GE, OperationCode::Logical2HGreaterEqual}, | ||
| 307 | std::pair{PredCondition::LTU, OperationCode::Logical2HLessThanWithNan}, | ||
| 308 | std::pair{PredCondition::LEU, OperationCode::Logical2HLessEqualWithNan}, | ||
| 309 | std::pair{PredCondition::GTU, OperationCode::Logical2HGreaterThanWithNan}, | ||
| 310 | std::pair{PredCondition::NEU, OperationCode::Logical2HNotEqualWithNan}, | ||
| 311 | std::pair{PredCondition::GEU, OperationCode::Logical2HGreaterEqualWithNan}, | ||
| 312 | }; | ||
| 313 | |||
| 314 | const auto comparison = | ||
| 315 | std::find_if(comparison_table.cbegin(), comparison_table.cend(), | ||
| 316 | [condition](const auto entry) { return condition == entry.first; }); | ||
| 317 | UNIMPLEMENTED_IF_MSG(comparison == comparison_table.cend(), | ||
| 318 | "Unknown predicate comparison operation"); | ||
| 319 | |||
| 320 | return Operation(comparison->second, NO_PRECISE, std::move(op_a), std::move(op_b)); | ||
| 321 | } | ||
| 322 | |||
| 323 | OperationCode ShaderIR::GetPredicateCombiner(PredOperation operation) { | ||
| 324 | static constexpr std::array operation_table{ | ||
| 325 | OperationCode::LogicalAnd, | ||
| 326 | OperationCode::LogicalOr, | ||
| 327 | OperationCode::LogicalXor, | ||
| 328 | }; | ||
| 329 | |||
| 330 | const auto index = static_cast<std::size_t>(operation); | ||
| 331 | if (index >= operation_table.size()) { | ||
| 332 | UNIMPLEMENTED_MSG("Unknown predicate operation."); | ||
| 333 | return {}; | ||
| 334 | } | ||
| 335 | |||
| 336 | return operation_table[index]; | ||
| 337 | } | ||
| 338 | |||
| 339 | Node ShaderIR::GetConditionCode(ConditionCode cc) const { | ||
| 340 | switch (cc) { | ||
| 341 | case ConditionCode::NEU: | ||
| 342 | return GetInternalFlag(InternalFlag::Zero, true); | ||
| 343 | case ConditionCode::FCSM_TR: | ||
| 344 | UNIMPLEMENTED_MSG("EXIT.FCSM_TR is not implemented"); | ||
| 345 | return MakeNode<PredicateNode>(Pred::NeverExecute, false); | ||
| 346 | default: | ||
| 347 | UNIMPLEMENTED_MSG("Unimplemented condition code: {}", cc); | ||
| 348 | return MakeNode<PredicateNode>(Pred::NeverExecute, false); | ||
| 349 | } | ||
| 350 | } | ||
| 351 | |||
| 352 | void ShaderIR::SetRegister(NodeBlock& bb, Register dest, Node src) { | ||
| 353 | bb.push_back(Operation(OperationCode::Assign, GetRegister(dest), std::move(src))); | ||
| 354 | } | ||
| 355 | |||
| 356 | void ShaderIR::SetPredicate(NodeBlock& bb, u64 dest, Node src) { | ||
| 357 | bb.push_back(Operation(OperationCode::LogicalAssign, GetPredicate(dest), std::move(src))); | ||
| 358 | } | ||
| 359 | |||
| 360 | void ShaderIR::SetInternalFlag(NodeBlock& bb, InternalFlag flag, Node value) { | ||
| 361 | bb.push_back(Operation(OperationCode::LogicalAssign, GetInternalFlag(flag), std::move(value))); | ||
| 362 | } | ||
| 363 | |||
| 364 | void ShaderIR::SetLocalMemory(NodeBlock& bb, Node address, Node value) { | ||
| 365 | bb.push_back( | ||
| 366 | Operation(OperationCode::Assign, GetLocalMemory(std::move(address)), std::move(value))); | ||
| 367 | } | ||
| 368 | |||
| 369 | void ShaderIR::SetSharedMemory(NodeBlock& bb, Node address, Node value) { | ||
| 370 | bb.push_back( | ||
| 371 | Operation(OperationCode::Assign, GetSharedMemory(std::move(address)), std::move(value))); | ||
| 372 | } | ||
| 373 | |||
| 374 | void ShaderIR::SetTemporary(NodeBlock& bb, u32 id, Node value) { | ||
| 375 | SetRegister(bb, Register::ZeroIndex + 1 + id, std::move(value)); | ||
| 376 | } | ||
| 377 | |||
| 378 | void ShaderIR::SetInternalFlagsFromFloat(NodeBlock& bb, Node value, bool sets_cc) { | ||
| 379 | if (!sets_cc) { | ||
| 380 | return; | ||
| 381 | } | ||
| 382 | Node zerop = Operation(OperationCode::LogicalFOrdEqual, std::move(value), Immediate(0.0f)); | ||
| 383 | SetInternalFlag(bb, InternalFlag::Zero, std::move(zerop)); | ||
| 384 | LOG_WARNING(HW_GPU, "Condition codes implementation is incomplete"); | ||
| 385 | } | ||
| 386 | |||
| 387 | void ShaderIR::SetInternalFlagsFromInteger(NodeBlock& bb, Node value, bool sets_cc) { | ||
| 388 | if (!sets_cc) { | ||
| 389 | return; | ||
| 390 | } | ||
| 391 | Node zerop = Operation(OperationCode::LogicalIEqual, std::move(value), Immediate(0)); | ||
| 392 | SetInternalFlag(bb, InternalFlag::Zero, std::move(zerop)); | ||
| 393 | LOG_WARNING(HW_GPU, "Condition codes implementation is incomplete"); | ||
| 394 | } | ||
| 395 | |||
| 396 | Node ShaderIR::BitfieldExtract(Node value, u32 offset, u32 bits) { | ||
| 397 | return Operation(OperationCode::UBitfieldExtract, NO_PRECISE, std::move(value), | ||
| 398 | Immediate(offset), Immediate(bits)); | ||
| 399 | } | ||
| 400 | |||
| 401 | Node ShaderIR::BitfieldInsert(Node base, Node insert, u32 offset, u32 bits) { | ||
| 402 | return Operation(OperationCode::UBitfieldInsert, NO_PRECISE, base, insert, Immediate(offset), | ||
| 403 | Immediate(bits)); | ||
| 404 | } | ||
| 405 | |||
| 406 | void ShaderIR::MarkAttributeUsage(Attribute::Index index, u64 element) { | ||
| 407 | switch (index) { | ||
| 408 | case Attribute::Index::LayerViewportPointSize: | ||
| 409 | switch (element) { | ||
| 410 | case 0: | ||
| 411 | UNIMPLEMENTED(); | ||
| 412 | break; | ||
| 413 | case 1: | ||
| 414 | uses_layer = true; | ||
| 415 | break; | ||
| 416 | case 2: | ||
| 417 | uses_viewport_index = true; | ||
| 418 | break; | ||
| 419 | case 3: | ||
| 420 | uses_point_size = true; | ||
| 421 | break; | ||
| 422 | } | ||
| 423 | break; | ||
| 424 | case Attribute::Index::TessCoordInstanceIDVertexID: | ||
| 425 | switch (element) { | ||
| 426 | case 2: | ||
| 427 | uses_instance_id = true; | ||
| 428 | break; | ||
| 429 | case 3: | ||
| 430 | uses_vertex_id = true; | ||
| 431 | break; | ||
| 432 | } | ||
| 433 | break; | ||
| 434 | case Attribute::Index::ClipDistances0123: | ||
| 435 | case Attribute::Index::ClipDistances4567: { | ||
| 436 | const u64 clip_index = (index == Attribute::Index::ClipDistances4567 ? 4 : 0) + element; | ||
| 437 | used_clip_distances.at(clip_index) = true; | ||
| 438 | break; | ||
| 439 | } | ||
| 440 | case Attribute::Index::FrontColor: | ||
| 441 | case Attribute::Index::FrontSecondaryColor: | ||
| 442 | case Attribute::Index::BackColor: | ||
| 443 | case Attribute::Index::BackSecondaryColor: | ||
| 444 | uses_legacy_varyings = true; | ||
| 445 | break; | ||
| 446 | default: | ||
| 447 | if (index >= Attribute::Index::TexCoord_0 && index <= Attribute::Index::TexCoord_7) { | ||
| 448 | uses_legacy_varyings = true; | ||
| 449 | } | ||
| 450 | break; | ||
| 451 | } | ||
| 452 | } | ||
| 453 | |||
| 454 | std::size_t ShaderIR::DeclareAmend(Node new_amend) { | ||
| 455 | const auto id = amend_code.size(); | ||
| 456 | amend_code.push_back(std::move(new_amend)); | ||
| 457 | return id; | ||
| 458 | } | ||
| 459 | |||
| 460 | u32 ShaderIR::NewCustomVariable() { | ||
| 461 | return num_custom_variables++; | ||
| 462 | } | ||
| 463 | |||
| 464 | } // namespace VideoCommon::Shader | ||
diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h deleted file mode 100644 index 1cd7c14d7..000000000 --- a/src/video_core/shader/shader_ir.h +++ /dev/null | |||
| @@ -1,479 +0,0 @@ | |||
| 1 | // Copyright 2018 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <array> | ||
| 8 | #include <list> | ||
| 9 | #include <map> | ||
| 10 | #include <optional> | ||
| 11 | #include <set> | ||
| 12 | #include <tuple> | ||
| 13 | #include <vector> | ||
| 14 | |||
| 15 | #include "common/common_types.h" | ||
| 16 | #include "video_core/engines/maxwell_3d.h" | ||
| 17 | #include "video_core/engines/shader_bytecode.h" | ||
| 18 | #include "video_core/engines/shader_header.h" | ||
| 19 | #include "video_core/shader/ast.h" | ||
| 20 | #include "video_core/shader/compiler_settings.h" | ||
| 21 | #include "video_core/shader/memory_util.h" | ||
| 22 | #include "video_core/shader/node.h" | ||
| 23 | #include "video_core/shader/registry.h" | ||
| 24 | |||
| 25 | namespace VideoCommon::Shader { | ||
| 26 | |||
| 27 | struct ShaderBlock; | ||
| 28 | |||
| 29 | constexpr u32 MAX_PROGRAM_LENGTH = 0x1000; | ||
| 30 | |||
| 31 | struct ConstBuffer { | ||
| 32 | constexpr explicit ConstBuffer(u32 max_offset_, bool is_indirect_) | ||
| 33 | : max_offset{max_offset_}, is_indirect{is_indirect_} {} | ||
| 34 | |||
| 35 | constexpr ConstBuffer() = default; | ||
| 36 | |||
| 37 | void MarkAsUsed(u64 offset) { | ||
| 38 | max_offset = std::max(max_offset, static_cast<u32>(offset)); | ||
| 39 | } | ||
| 40 | |||
| 41 | void MarkAsUsedIndirect() { | ||
| 42 | is_indirect = true; | ||
| 43 | } | ||
| 44 | |||
| 45 | bool IsIndirect() const { | ||
| 46 | return is_indirect; | ||
| 47 | } | ||
| 48 | |||
| 49 | u32 GetSize() const { | ||
| 50 | return max_offset + static_cast<u32>(sizeof(float)); | ||
| 51 | } | ||
| 52 | |||
| 53 | u32 GetMaxOffset() const { | ||
| 54 | return max_offset; | ||
| 55 | } | ||
| 56 | |||
| 57 | private: | ||
| 58 | u32 max_offset = 0; | ||
| 59 | bool is_indirect = false; | ||
| 60 | }; | ||
| 61 | |||
| 62 | struct GlobalMemoryUsage { | ||
| 63 | bool is_read{}; | ||
| 64 | bool is_written{}; | ||
| 65 | }; | ||
| 66 | |||
| 67 | class ShaderIR final { | ||
| 68 | public: | ||
| 69 | explicit ShaderIR(const ProgramCode& program_code_, u32 main_offset_, | ||
| 70 | CompilerSettings settings_, Registry& registry_); | ||
| 71 | ~ShaderIR(); | ||
| 72 | |||
| 73 | const std::map<u32, NodeBlock>& GetBasicBlocks() const { | ||
| 74 | return basic_blocks; | ||
| 75 | } | ||
| 76 | |||
| 77 | const std::set<u32>& GetRegisters() const { | ||
| 78 | return used_registers; | ||
| 79 | } | ||
| 80 | |||
| 81 | const std::set<Tegra::Shader::Pred>& GetPredicates() const { | ||
| 82 | return used_predicates; | ||
| 83 | } | ||
| 84 | |||
| 85 | const std::set<Tegra::Shader::Attribute::Index>& GetInputAttributes() const { | ||
| 86 | return used_input_attributes; | ||
| 87 | } | ||
| 88 | |||
| 89 | const std::set<Tegra::Shader::Attribute::Index>& GetOutputAttributes() const { | ||
| 90 | return used_output_attributes; | ||
| 91 | } | ||
| 92 | |||
| 93 | const std::map<u32, ConstBuffer>& GetConstantBuffers() const { | ||
| 94 | return used_cbufs; | ||
| 95 | } | ||
| 96 | |||
| 97 | const std::list<SamplerEntry>& GetSamplers() const { | ||
| 98 | return used_samplers; | ||
| 99 | } | ||
| 100 | |||
| 101 | const std::list<ImageEntry>& GetImages() const { | ||
| 102 | return used_images; | ||
| 103 | } | ||
| 104 | |||
| 105 | const std::array<bool, Tegra::Engines::Maxwell3D::Regs::NumClipDistances>& GetClipDistances() | ||
| 106 | const { | ||
| 107 | return used_clip_distances; | ||
| 108 | } | ||
| 109 | |||
| 110 | const std::map<GlobalMemoryBase, GlobalMemoryUsage>& GetGlobalMemory() const { | ||
| 111 | return used_global_memory; | ||
| 112 | } | ||
| 113 | |||
| 114 | std::size_t GetLength() const { | ||
| 115 | return static_cast<std::size_t>(coverage_end * sizeof(u64)); | ||
| 116 | } | ||
| 117 | |||
| 118 | bool UsesLayer() const { | ||
| 119 | return uses_layer; | ||
| 120 | } | ||
| 121 | |||
| 122 | bool UsesViewportIndex() const { | ||
| 123 | return uses_viewport_index; | ||
| 124 | } | ||
| 125 | |||
| 126 | bool UsesPointSize() const { | ||
| 127 | return uses_point_size; | ||
| 128 | } | ||
| 129 | |||
| 130 | bool UsesInstanceId() const { | ||
| 131 | return uses_instance_id; | ||
| 132 | } | ||
| 133 | |||
| 134 | bool UsesVertexId() const { | ||
| 135 | return uses_vertex_id; | ||
| 136 | } | ||
| 137 | |||
| 138 | bool UsesLegacyVaryings() const { | ||
| 139 | return uses_legacy_varyings; | ||
| 140 | } | ||
| 141 | |||
| 142 | bool UsesYNegate() const { | ||
| 143 | return uses_y_negate; | ||
| 144 | } | ||
| 145 | |||
| 146 | bool UsesWarps() const { | ||
| 147 | return uses_warps; | ||
| 148 | } | ||
| 149 | |||
| 150 | bool HasPhysicalAttributes() const { | ||
| 151 | return uses_physical_attributes; | ||
| 152 | } | ||
| 153 | |||
| 154 | const Tegra::Shader::Header& GetHeader() const { | ||
| 155 | return header; | ||
| 156 | } | ||
| 157 | |||
| 158 | bool IsFlowStackDisabled() const { | ||
| 159 | return disable_flow_stack; | ||
| 160 | } | ||
| 161 | |||
| 162 | bool IsDecompiled() const { | ||
| 163 | return decompiled; | ||
| 164 | } | ||
| 165 | |||
| 166 | const ASTManager& GetASTManager() const { | ||
| 167 | return program_manager; | ||
| 168 | } | ||
| 169 | |||
| 170 | ASTNode GetASTProgram() const { | ||
| 171 | return program_manager.GetProgram(); | ||
| 172 | } | ||
| 173 | |||
| 174 | u32 GetASTNumVariables() const { | ||
| 175 | return program_manager.GetVariables(); | ||
| 176 | } | ||
| 177 | |||
| 178 | u32 ConvertAddressToNvidiaSpace(u32 address) const { | ||
| 179 | return (address - main_offset) * static_cast<u32>(sizeof(Tegra::Shader::Instruction)); | ||
| 180 | } | ||
| 181 | |||
| 182 | /// Returns a condition code evaluated from internal flags | ||
| 183 | Node GetConditionCode(Tegra::Shader::ConditionCode cc) const; | ||
| 184 | |||
| 185 | const Node& GetAmendNode(std::size_t index) const { | ||
| 186 | return amend_code[index]; | ||
| 187 | } | ||
| 188 | |||
| 189 | u32 GetNumCustomVariables() const { | ||
| 190 | return num_custom_variables; | ||
| 191 | } | ||
| 192 | |||
| 193 | private: | ||
| 194 | friend class ASTDecoder; | ||
| 195 | |||
| 196 | struct SamplerInfo { | ||
| 197 | std::optional<Tegra::Shader::TextureType> type; | ||
| 198 | std::optional<bool> is_array; | ||
| 199 | std::optional<bool> is_shadow; | ||
| 200 | std::optional<bool> is_buffer; | ||
| 201 | |||
| 202 | constexpr bool IsComplete() const noexcept { | ||
| 203 | return type && is_array && is_shadow && is_buffer; | ||
| 204 | } | ||
| 205 | }; | ||
| 206 | |||
| 207 | void Decode(); | ||
| 208 | void PostDecode(); | ||
| 209 | |||
| 210 | NodeBlock DecodeRange(u32 begin, u32 end); | ||
| 211 | void DecodeRangeInner(NodeBlock& bb, u32 begin, u32 end); | ||
| 212 | void InsertControlFlow(NodeBlock& bb, const ShaderBlock& block); | ||
| 213 | |||
| 214 | /** | ||
| 215 | * Decodes a single instruction from Tegra to IR. | ||
| 216 | * @param bb Basic block where the nodes will be written to. | ||
| 217 | * @param pc Program counter. Offset to decode. | ||
| 218 | * @return Next address to decode. | ||
| 219 | */ | ||
| 220 | u32 DecodeInstr(NodeBlock& bb, u32 pc); | ||
| 221 | |||
| 222 | u32 DecodeArithmetic(NodeBlock& bb, u32 pc); | ||
| 223 | u32 DecodeArithmeticImmediate(NodeBlock& bb, u32 pc); | ||
| 224 | u32 DecodeBfe(NodeBlock& bb, u32 pc); | ||
| 225 | u32 DecodeBfi(NodeBlock& bb, u32 pc); | ||
| 226 | u32 DecodeShift(NodeBlock& bb, u32 pc); | ||
| 227 | u32 DecodeArithmeticInteger(NodeBlock& bb, u32 pc); | ||
| 228 | u32 DecodeArithmeticIntegerImmediate(NodeBlock& bb, u32 pc); | ||
| 229 | u32 DecodeArithmeticHalf(NodeBlock& bb, u32 pc); | ||
| 230 | u32 DecodeArithmeticHalfImmediate(NodeBlock& bb, u32 pc); | ||
| 231 | u32 DecodeFfma(NodeBlock& bb, u32 pc); | ||
| 232 | u32 DecodeHfma2(NodeBlock& bb, u32 pc); | ||
| 233 | u32 DecodeConversion(NodeBlock& bb, u32 pc); | ||
| 234 | u32 DecodeWarp(NodeBlock& bb, u32 pc); | ||
| 235 | u32 DecodeMemory(NodeBlock& bb, u32 pc); | ||
| 236 | u32 DecodeTexture(NodeBlock& bb, u32 pc); | ||
| 237 | u32 DecodeImage(NodeBlock& bb, u32 pc); | ||
| 238 | u32 DecodeFloatSetPredicate(NodeBlock& bb, u32 pc); | ||
| 239 | u32 DecodeIntegerSetPredicate(NodeBlock& bb, u32 pc); | ||
| 240 | u32 DecodeHalfSetPredicate(NodeBlock& bb, u32 pc); | ||
| 241 | u32 DecodePredicateSetRegister(NodeBlock& bb, u32 pc); | ||
| 242 | u32 DecodePredicateSetPredicate(NodeBlock& bb, u32 pc); | ||
| 243 | u32 DecodeRegisterSetPredicate(NodeBlock& bb, u32 pc); | ||
| 244 | u32 DecodeFloatSet(NodeBlock& bb, u32 pc); | ||
| 245 | u32 DecodeIntegerSet(NodeBlock& bb, u32 pc); | ||
| 246 | u32 DecodeHalfSet(NodeBlock& bb, u32 pc); | ||
| 247 | u32 DecodeVideo(NodeBlock& bb, u32 pc); | ||
| 248 | u32 DecodeXmad(NodeBlock& bb, u32 pc); | ||
| 249 | u32 DecodeOther(NodeBlock& bb, u32 pc); | ||
| 250 | |||
| 251 | /// Generates a node for a passed register. | ||
| 252 | Node GetRegister(Tegra::Shader::Register reg); | ||
| 253 | /// Generates a node for a custom variable | ||
| 254 | Node GetCustomVariable(u32 id); | ||
| 255 | /// Generates a node representing a 19-bit immediate value | ||
| 256 | Node GetImmediate19(Tegra::Shader::Instruction instr); | ||
| 257 | /// Generates a node representing a 32-bit immediate value | ||
| 258 | Node GetImmediate32(Tegra::Shader::Instruction instr); | ||
| 259 | /// Generates a node representing a constant buffer | ||
| 260 | Node GetConstBuffer(u64 index, u64 offset); | ||
| 261 | /// Generates a node representing a constant buffer with a variadic offset | ||
| 262 | Node GetConstBufferIndirect(u64 index, u64 offset, Node node); | ||
| 263 | /// Generates a node for a passed predicate. It can be optionally negated | ||
| 264 | Node GetPredicate(u64 pred, bool negated = false); | ||
| 265 | /// Generates a predicate node for an immediate true or false value | ||
| 266 | Node GetPredicate(bool immediate); | ||
| 267 | /// Generates a node representing an input attribute. Keeps track of used attributes. | ||
| 268 | Node GetInputAttribute(Tegra::Shader::Attribute::Index index, u64 element, Node buffer = {}); | ||
| 269 | /// Generates a node representing a physical input attribute. | ||
| 270 | Node GetPhysicalInputAttribute(Tegra::Shader::Register physical_address, Node buffer = {}); | ||
| 271 | /// Generates a node representing an output attribute. Keeps track of used attributes. | ||
| 272 | Node GetOutputAttribute(Tegra::Shader::Attribute::Index index, u64 element, Node buffer); | ||
| 273 | /// Generates a node representing an internal flag | ||
| 274 | Node GetInternalFlag(InternalFlag flag, bool negated = false) const; | ||
| 275 | /// Generates a node representing a local memory address | ||
| 276 | Node GetLocalMemory(Node address); | ||
| 277 | /// Generates a node representing a shared memory address | ||
| 278 | Node GetSharedMemory(Node address); | ||
| 279 | /// Generates a temporary, internally it uses a post-RZ register | ||
| 280 | Node GetTemporary(u32 id); | ||
| 281 | |||
| 282 | /// Sets a register. src value must be a number-evaluated node. | ||
| 283 | void SetRegister(NodeBlock& bb, Tegra::Shader::Register dest, Node src); | ||
| 284 | /// Sets a predicate. src value must be a bool-evaluated node | ||
| 285 | void SetPredicate(NodeBlock& bb, u64 dest, Node src); | ||
| 286 | /// Sets an internal flag. src value must be a bool-evaluated node | ||
| 287 | void SetInternalFlag(NodeBlock& bb, InternalFlag flag, Node value); | ||
| 288 | /// Sets a local memory address with a value. | ||
| 289 | void SetLocalMemory(NodeBlock& bb, Node address, Node value); | ||
| 290 | /// Sets a shared memory address with a value. | ||
| 291 | void SetSharedMemory(NodeBlock& bb, Node address, Node value); | ||
| 292 | /// Sets a temporary. Internally it uses a post-RZ register | ||
| 293 | void SetTemporary(NodeBlock& bb, u32 id, Node value); | ||
| 294 | |||
| 295 | /// Sets internal flags from a float | ||
| 296 | void SetInternalFlagsFromFloat(NodeBlock& bb, Node value, bool sets_cc = true); | ||
| 297 | /// Sets internal flags from an integer | ||
| 298 | void SetInternalFlagsFromInteger(NodeBlock& bb, Node value, bool sets_cc = true); | ||
| 299 | |||
| 300 | /// Conditionally absolute/negated float. Absolute is applied first | ||
| 301 | Node GetOperandAbsNegFloat(Node value, bool absolute, bool negate); | ||
| 302 | /// Conditionally saturates a float | ||
| 303 | Node GetSaturatedFloat(Node value, bool saturate = true); | ||
| 304 | |||
| 305 | /// Converts an integer to different sizes. | ||
| 306 | Node ConvertIntegerSize(Node value, Tegra::Shader::Register::Size size, bool is_signed); | ||
| 307 | /// Conditionally absolute/negated integer. Absolute is applied first | ||
| 308 | Node GetOperandAbsNegInteger(Node value, bool absolute, bool negate, bool is_signed); | ||
| 309 | |||
| 310 | /// Unpacks a half immediate from an instruction | ||
| 311 | Node UnpackHalfImmediate(Tegra::Shader::Instruction instr, bool has_negation); | ||
| 312 | /// Unpacks a binary value into a half float pair with a type format | ||
| 313 | Node UnpackHalfFloat(Node value, Tegra::Shader::HalfType type); | ||
| 314 | /// Merges a half pair into another value | ||
| 315 | Node HalfMerge(Node dest, Node src, Tegra::Shader::HalfMerge merge); | ||
| 316 | /// Conditionally absolute/negated half float pair. Absolute is applied first | ||
| 317 | Node GetOperandAbsNegHalf(Node value, bool absolute, bool negate); | ||
| 318 | /// Conditionally saturates a half float pair | ||
| 319 | Node GetSaturatedHalfFloat(Node value, bool saturate = true); | ||
| 320 | |||
| 321 | /// Get image component value by type and size | ||
| 322 | std::pair<Node, bool> GetComponentValue(Tegra::Texture::ComponentType component_type, | ||
| 323 | u32 component_size, Node original_value); | ||
| 324 | |||
| 325 | /// Returns a predicate comparing two floats | ||
| 326 | Node GetPredicateComparisonFloat(Tegra::Shader::PredCondition condition, Node op_a, Node op_b); | ||
| 327 | /// Returns a predicate comparing two integers | ||
| 328 | Node GetPredicateComparisonInteger(Tegra::Shader::PredCondition condition, bool is_signed, | ||
| 329 | Node op_a, Node op_b); | ||
| 330 | /// Returns a predicate comparing two half floats. meta consumes how both pairs will be compared | ||
| 331 | Node GetPredicateComparisonHalf(Tegra::Shader::PredCondition condition, Node op_a, Node op_b); | ||
| 332 | |||
| 333 | /// Returns a predicate combiner operation | ||
| 334 | OperationCode GetPredicateCombiner(Tegra::Shader::PredOperation operation); | ||
| 335 | |||
| 336 | /// Queries the missing sampler info from the execution context. | ||
| 337 | SamplerInfo GetSamplerInfo(SamplerInfo info, | ||
| 338 | std::optional<Tegra::Engines::SamplerDescriptor> sampler); | ||
| 339 | |||
| 340 | /// Accesses a texture sampler. | ||
| 341 | std::optional<SamplerEntry> GetSampler(Tegra::Shader::Sampler sampler, SamplerInfo info); | ||
| 342 | |||
| 343 | /// Accesses a texture sampler for a bindless texture. | ||
| 344 | std::optional<SamplerEntry> GetBindlessSampler(Tegra::Shader::Register reg, SamplerInfo info, | ||
| 345 | Node& index_var); | ||
| 346 | |||
| 347 | /// Accesses an image. | ||
| 348 | ImageEntry& GetImage(Tegra::Shader::Image image, Tegra::Shader::ImageType type); | ||
| 349 | |||
| 350 | /// Access a bindless image sampler. | ||
| 351 | ImageEntry& GetBindlessImage(Tegra::Shader::Register reg, Tegra::Shader::ImageType type); | ||
| 352 | |||
| 353 | /// Extracts a sequence of bits from a node | ||
| 354 | Node BitfieldExtract(Node value, u32 offset, u32 bits); | ||
| 355 | |||
| 356 | /// Inserts a sequence of bits from a node | ||
| 357 | Node BitfieldInsert(Node base, Node insert, u32 offset, u32 bits); | ||
| 358 | |||
| 359 | /// Marks the usage of a input or output attribute. | ||
| 360 | void MarkAttributeUsage(Tegra::Shader::Attribute::Index index, u64 element); | ||
| 361 | |||
| 362 | /// Decodes VMNMX instruction and inserts its code into the passed basic block. | ||
| 363 | void DecodeVMNMX(NodeBlock& bb, Tegra::Shader::Instruction instr); | ||
| 364 | |||
| 365 | void WriteTexInstructionFloat(NodeBlock& bb, Tegra::Shader::Instruction instr, | ||
| 366 | const Node4& components); | ||
| 367 | |||
| 368 | void WriteTexsInstructionFloat(NodeBlock& bb, Tegra::Shader::Instruction instr, | ||
| 369 | const Node4& components, bool ignore_mask = false); | ||
| 370 | void WriteTexsInstructionHalfFloat(NodeBlock& bb, Tegra::Shader::Instruction instr, | ||
| 371 | const Node4& components, bool ignore_mask = false); | ||
| 372 | |||
| 373 | Node4 GetTexCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type, | ||
| 374 | Tegra::Shader::TextureProcessMode process_mode, bool depth_compare, | ||
| 375 | bool is_array, bool is_aoffi, | ||
| 376 | std::optional<Tegra::Shader::Register> bindless_reg); | ||
| 377 | |||
| 378 | Node4 GetTexsCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type, | ||
| 379 | Tegra::Shader::TextureProcessMode process_mode, bool depth_compare, | ||
| 380 | bool is_array); | ||
| 381 | |||
| 382 | Node4 GetTld4Code(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type, | ||
| 383 | bool depth_compare, bool is_array, bool is_aoffi, bool is_ptp, | ||
| 384 | bool is_bindless); | ||
| 385 | |||
| 386 | Node4 GetTldCode(Tegra::Shader::Instruction instr); | ||
| 387 | |||
| 388 | Node4 GetTldsCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type, | ||
| 389 | bool is_array); | ||
| 390 | |||
| 391 | std::tuple<std::size_t, std::size_t> ValidateAndGetCoordinateElement( | ||
| 392 | Tegra::Shader::TextureType texture_type, bool depth_compare, bool is_array, | ||
| 393 | bool lod_bias_enabled, std::size_t max_coords, std::size_t max_inputs); | ||
| 394 | |||
| 395 | std::vector<Node> GetAoffiCoordinates(Node aoffi_reg, std::size_t coord_count, bool is_tld4); | ||
| 396 | |||
| 397 | std::vector<Node> GetPtpCoordinates(std::array<Node, 2> ptp_regs); | ||
| 398 | |||
| 399 | Node4 GetTextureCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type, | ||
| 400 | Tegra::Shader::TextureProcessMode process_mode, std::vector<Node> coords, | ||
| 401 | Node array, Node depth_compare, u32 bias_offset, std::vector<Node> aoffi, | ||
| 402 | std::optional<Tegra::Shader::Register> bindless_reg); | ||
| 403 | |||
| 404 | Node GetVideoOperand(Node op, bool is_chunk, bool is_signed, Tegra::Shader::VideoType type, | ||
| 405 | u64 byte_height); | ||
| 406 | |||
| 407 | void WriteLogicOperation(NodeBlock& bb, Tegra::Shader::Register dest, | ||
| 408 | Tegra::Shader::LogicOperation logic_op, Node op_a, Node op_b, | ||
| 409 | Tegra::Shader::PredicateResultMode predicate_mode, | ||
| 410 | Tegra::Shader::Pred predicate, bool sets_cc); | ||
| 411 | void WriteLop3Instruction(NodeBlock& bb, Tegra::Shader::Register dest, Node op_a, Node op_b, | ||
| 412 | Node op_c, Node imm_lut, bool sets_cc); | ||
| 413 | |||
| 414 | std::tuple<Node, u32, u32> TrackCbuf(Node tracked, const NodeBlock& code, s64 cursor) const; | ||
| 415 | |||
| 416 | std::pair<Node, TrackSampler> TrackBindlessSampler(Node tracked, const NodeBlock& code, | ||
| 417 | s64 cursor); | ||
| 418 | |||
| 419 | std::pair<Node, TrackSampler> HandleBindlessIndirectRead(const CbufNode& cbuf, | ||
| 420 | const OperationNode& operation, | ||
| 421 | Node gpr, Node base_offset, | ||
| 422 | Node tracked, const NodeBlock& code, | ||
| 423 | s64 cursor); | ||
| 424 | |||
| 425 | std::optional<u32> TrackImmediate(Node tracked, const NodeBlock& code, s64 cursor) const; | ||
| 426 | |||
| 427 | std::pair<Node, s64> TrackRegister(const GprNode* tracked, const NodeBlock& code, | ||
| 428 | s64 cursor) const; | ||
| 429 | |||
| 430 | std::tuple<Node, Node, GlobalMemoryBase> TrackGlobalMemory(NodeBlock& bb, | ||
| 431 | Tegra::Shader::Instruction instr, | ||
| 432 | bool is_read, bool is_write); | ||
| 433 | |||
| 434 | /// Register new amending code and obtain the reference id. | ||
| 435 | std::size_t DeclareAmend(Node new_amend); | ||
| 436 | |||
| 437 | u32 NewCustomVariable(); | ||
| 438 | |||
| 439 | const ProgramCode& program_code; | ||
| 440 | const u32 main_offset; | ||
| 441 | const CompilerSettings settings; | ||
| 442 | Registry& registry; | ||
| 443 | |||
| 444 | bool decompiled{}; | ||
| 445 | bool disable_flow_stack{}; | ||
| 446 | |||
| 447 | u32 coverage_begin{}; | ||
| 448 | u32 coverage_end{}; | ||
| 449 | |||
| 450 | std::map<u32, NodeBlock> basic_blocks; | ||
| 451 | NodeBlock global_code; | ||
| 452 | ASTManager program_manager{true, true}; | ||
| 453 | std::vector<Node> amend_code; | ||
| 454 | u32 num_custom_variables{}; | ||
| 455 | |||
| 456 | std::set<u32> used_registers; | ||
| 457 | std::set<Tegra::Shader::Pred> used_predicates; | ||
| 458 | std::set<Tegra::Shader::Attribute::Index> used_input_attributes; | ||
| 459 | std::set<Tegra::Shader::Attribute::Index> used_output_attributes; | ||
| 460 | std::map<u32, ConstBuffer> used_cbufs; | ||
| 461 | std::list<SamplerEntry> used_samplers; | ||
| 462 | std::list<ImageEntry> used_images; | ||
| 463 | std::array<bool, Tegra::Engines::Maxwell3D::Regs::NumClipDistances> used_clip_distances{}; | ||
| 464 | std::map<GlobalMemoryBase, GlobalMemoryUsage> used_global_memory; | ||
| 465 | bool uses_layer{}; | ||
| 466 | bool uses_viewport_index{}; | ||
| 467 | bool uses_point_size{}; | ||
| 468 | bool uses_physical_attributes{}; // Shader uses AL2P or physical attribute read/writes | ||
| 469 | bool uses_instance_id{}; | ||
| 470 | bool uses_vertex_id{}; | ||
| 471 | bool uses_legacy_varyings{}; | ||
| 472 | bool uses_y_negate{}; | ||
| 473 | bool uses_warps{}; | ||
| 474 | bool uses_indexed_samplers{}; | ||
| 475 | |||
| 476 | Tegra::Shader::Header header; | ||
| 477 | }; | ||
| 478 | |||
| 479 | } // namespace VideoCommon::Shader | ||
diff --git a/src/video_core/shader/track.cpp b/src/video_core/shader/track.cpp deleted file mode 100644 index 6be3ea92b..000000000 --- a/src/video_core/shader/track.cpp +++ /dev/null | |||
| @@ -1,236 +0,0 @@ | |||
| 1 | // Copyright 2018 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <algorithm> | ||
| 6 | #include <utility> | ||
| 7 | #include <variant> | ||
| 8 | |||
| 9 | #include "common/common_types.h" | ||
| 10 | #include "video_core/shader/node.h" | ||
| 11 | #include "video_core/shader/node_helper.h" | ||
| 12 | #include "video_core/shader/shader_ir.h" | ||
| 13 | |||
| 14 | namespace VideoCommon::Shader { | ||
| 15 | |||
| 16 | namespace { | ||
| 17 | |||
| 18 | std::pair<Node, s64> FindOperation(const NodeBlock& code, s64 cursor, | ||
| 19 | OperationCode operation_code) { | ||
| 20 | for (; cursor >= 0; --cursor) { | ||
| 21 | Node node = code.at(cursor); | ||
| 22 | |||
| 23 | if (const auto operation = std::get_if<OperationNode>(&*node)) { | ||
| 24 | if (operation->GetCode() == operation_code) { | ||
| 25 | return {std::move(node), cursor}; | ||
| 26 | } | ||
| 27 | } | ||
| 28 | |||
| 29 | if (const auto conditional = std::get_if<ConditionalNode>(&*node)) { | ||
| 30 | const auto& conditional_code = conditional->GetCode(); | ||
| 31 | auto result = FindOperation( | ||
| 32 | conditional_code, static_cast<s64>(conditional_code.size() - 1), operation_code); | ||
| 33 | auto& found = result.first; | ||
| 34 | if (found) { | ||
| 35 | return {std::move(found), cursor}; | ||
| 36 | } | ||
| 37 | } | ||
| 38 | } | ||
| 39 | return {}; | ||
| 40 | } | ||
| 41 | |||
| 42 | std::optional<std::pair<Node, Node>> DecoupleIndirectRead(const OperationNode& operation) { | ||
| 43 | if (operation.GetCode() != OperationCode::UAdd) { | ||
| 44 | return std::nullopt; | ||
| 45 | } | ||
| 46 | Node gpr; | ||
| 47 | Node offset; | ||
| 48 | ASSERT(operation.GetOperandsCount() == 2); | ||
| 49 | for (std::size_t i = 0; i < operation.GetOperandsCount(); i++) { | ||
| 50 | Node operand = operation[i]; | ||
| 51 | if (std::holds_alternative<ImmediateNode>(*operand)) { | ||
| 52 | offset = operation[i]; | ||
| 53 | } else if (std::holds_alternative<GprNode>(*operand)) { | ||
| 54 | gpr = operation[i]; | ||
| 55 | } | ||
| 56 | } | ||
| 57 | if (offset && gpr) { | ||
| 58 | return std::make_pair(gpr, offset); | ||
| 59 | } | ||
| 60 | return std::nullopt; | ||
| 61 | } | ||
| 62 | |||
| 63 | bool AmendNodeCv(std::size_t amend_index, Node node) { | ||
| 64 | if (const auto operation = std::get_if<OperationNode>(&*node)) { | ||
| 65 | operation->SetAmendIndex(amend_index); | ||
| 66 | return true; | ||
| 67 | } | ||
| 68 | if (const auto conditional = std::get_if<ConditionalNode>(&*node)) { | ||
| 69 | conditional->SetAmendIndex(amend_index); | ||
| 70 | return true; | ||
| 71 | } | ||
| 72 | return false; | ||
| 73 | } | ||
| 74 | |||
| 75 | } // Anonymous namespace | ||
| 76 | |||
| 77 | std::pair<Node, TrackSampler> ShaderIR::TrackBindlessSampler(Node tracked, const NodeBlock& code, | ||
| 78 | s64 cursor) { | ||
| 79 | if (const auto cbuf = std::get_if<CbufNode>(&*tracked)) { | ||
| 80 | const u32 cbuf_index = cbuf->GetIndex(); | ||
| 81 | |||
| 82 | // Constant buffer found, test if it's an immediate | ||
| 83 | const auto& offset = cbuf->GetOffset(); | ||
| 84 | if (const auto immediate = std::get_if<ImmediateNode>(&*offset)) { | ||
| 85 | auto track = MakeTrackSampler<BindlessSamplerNode>(cbuf_index, immediate->GetValue()); | ||
| 86 | return {tracked, track}; | ||
| 87 | } | ||
| 88 | if (const auto operation = std::get_if<OperationNode>(&*offset)) { | ||
| 89 | const u32 bound_buffer = registry.GetBoundBuffer(); | ||
| 90 | if (bound_buffer != cbuf_index) { | ||
| 91 | return {}; | ||
| 92 | } | ||
| 93 | if (const std::optional pair = DecoupleIndirectRead(*operation)) { | ||
| 94 | auto [gpr, base_offset] = *pair; | ||
| 95 | return HandleBindlessIndirectRead(*cbuf, *operation, gpr, base_offset, tracked, | ||
| 96 | code, cursor); | ||
| 97 | } | ||
| 98 | } | ||
| 99 | return {}; | ||
| 100 | } | ||
| 101 | if (const auto gpr = std::get_if<GprNode>(&*tracked)) { | ||
| 102 | if (gpr->GetIndex() == Tegra::Shader::Register::ZeroIndex) { | ||
| 103 | return {}; | ||
| 104 | } | ||
| 105 | // Reduce the cursor in one to avoid infinite loops when the instruction sets the same | ||
| 106 | // register that it uses as operand | ||
| 107 | const auto [source, new_cursor] = TrackRegister(gpr, code, cursor - 1); | ||
| 108 | if (!source) { | ||
| 109 | return {}; | ||
| 110 | } | ||
| 111 | return TrackBindlessSampler(source, code, new_cursor); | ||
| 112 | } | ||
| 113 | if (const auto operation = std::get_if<OperationNode>(&*tracked)) { | ||
| 114 | const OperationNode& op = *operation; | ||
| 115 | |||
| 116 | const OperationCode opcode = operation->GetCode(); | ||
| 117 | if (opcode == OperationCode::IBitwiseOr || opcode == OperationCode::UBitwiseOr) { | ||
| 118 | ASSERT(op.GetOperandsCount() == 2); | ||
| 119 | auto [node_a, index_a, offset_a] = TrackCbuf(op[0], code, cursor); | ||
| 120 | auto [node_b, index_b, offset_b] = TrackCbuf(op[1], code, cursor); | ||
| 121 | if (node_a && node_b) { | ||
| 122 | auto track = MakeTrackSampler<SeparateSamplerNode>(std::pair{index_a, index_b}, | ||
| 123 | std::pair{offset_a, offset_b}); | ||
| 124 | return {tracked, std::move(track)}; | ||
| 125 | } | ||
| 126 | } | ||
| 127 | std::size_t i = op.GetOperandsCount(); | ||
| 128 | while (i--) { | ||
| 129 | if (auto found = TrackBindlessSampler(op[i - 1], code, cursor); std::get<0>(found)) { | ||
| 130 | // Constant buffer found in operand. | ||
| 131 | return found; | ||
| 132 | } | ||
| 133 | } | ||
| 134 | return {}; | ||
| 135 | } | ||
| 136 | if (const auto conditional = std::get_if<ConditionalNode>(&*tracked)) { | ||
| 137 | const auto& conditional_code = conditional->GetCode(); | ||
| 138 | return TrackBindlessSampler(tracked, conditional_code, | ||
| 139 | static_cast<s64>(conditional_code.size())); | ||
| 140 | } | ||
| 141 | return {}; | ||
| 142 | } | ||
| 143 | |||
| 144 | std::pair<Node, TrackSampler> ShaderIR::HandleBindlessIndirectRead( | ||
| 145 | const CbufNode& cbuf, const OperationNode& operation, Node gpr, Node base_offset, Node tracked, | ||
| 146 | const NodeBlock& code, s64 cursor) { | ||
| 147 | const auto offset_imm = std::get<ImmediateNode>(*base_offset); | ||
| 148 | const auto& gpu_driver = registry.AccessGuestDriverProfile(); | ||
| 149 | const u32 bindless_cv = NewCustomVariable(); | ||
| 150 | const u32 texture_handler_size = gpu_driver.GetTextureHandlerSize(); | ||
| 151 | Node op = Operation(OperationCode::UDiv, gpr, Immediate(texture_handler_size)); | ||
| 152 | |||
| 153 | Node cv_node = GetCustomVariable(bindless_cv); | ||
| 154 | Node amend_op = Operation(OperationCode::Assign, std::move(cv_node), std::move(op)); | ||
| 155 | const std::size_t amend_index = DeclareAmend(std::move(amend_op)); | ||
| 156 | AmendNodeCv(amend_index, code[cursor]); | ||
| 157 | |||
| 158 | // TODO: Implement bindless index custom variable | ||
| 159 | auto track = | ||
| 160 | MakeTrackSampler<ArraySamplerNode>(cbuf.GetIndex(), offset_imm.GetValue(), bindless_cv); | ||
| 161 | return {tracked, track}; | ||
| 162 | } | ||
| 163 | |||
| 164 | std::tuple<Node, u32, u32> ShaderIR::TrackCbuf(Node tracked, const NodeBlock& code, | ||
| 165 | s64 cursor) const { | ||
| 166 | if (const auto cbuf = std::get_if<CbufNode>(&*tracked)) { | ||
| 167 | // Constant buffer found, test if it's an immediate | ||
| 168 | const auto& offset = cbuf->GetOffset(); | ||
| 169 | if (const auto immediate = std::get_if<ImmediateNode>(&*offset)) { | ||
| 170 | return {tracked, cbuf->GetIndex(), immediate->GetValue()}; | ||
| 171 | } | ||
| 172 | return {}; | ||
| 173 | } | ||
| 174 | if (const auto gpr = std::get_if<GprNode>(&*tracked)) { | ||
| 175 | if (gpr->GetIndex() == Tegra::Shader::Register::ZeroIndex) { | ||
| 176 | return {}; | ||
| 177 | } | ||
| 178 | // Reduce the cursor in one to avoid infinite loops when the instruction sets the same | ||
| 179 | // register that it uses as operand | ||
| 180 | const auto [source, new_cursor] = TrackRegister(gpr, code, cursor - 1); | ||
| 181 | if (!source) { | ||
| 182 | return {}; | ||
| 183 | } | ||
| 184 | return TrackCbuf(source, code, new_cursor); | ||
| 185 | } | ||
| 186 | if (const auto operation = std::get_if<OperationNode>(&*tracked)) { | ||
| 187 | for (std::size_t i = operation->GetOperandsCount(); i > 0; --i) { | ||
| 188 | if (auto found = TrackCbuf((*operation)[i - 1], code, cursor); std::get<0>(found)) { | ||
| 189 | // Cbuf found in operand. | ||
| 190 | return found; | ||
| 191 | } | ||
| 192 | } | ||
| 193 | return {}; | ||
| 194 | } | ||
| 195 | if (const auto conditional = std::get_if<ConditionalNode>(&*tracked)) { | ||
| 196 | const auto& conditional_code = conditional->GetCode(); | ||
| 197 | return TrackCbuf(tracked, conditional_code, static_cast<s64>(conditional_code.size())); | ||
| 198 | } | ||
| 199 | return {}; | ||
| 200 | } | ||
| 201 | |||
| 202 | std::optional<u32> ShaderIR::TrackImmediate(Node tracked, const NodeBlock& code, s64 cursor) const { | ||
| 203 | // Reduce the cursor in one to avoid infinite loops when the instruction sets the same register | ||
| 204 | // that it uses as operand | ||
| 205 | const auto result = TrackRegister(&std::get<GprNode>(*tracked), code, cursor - 1); | ||
| 206 | const auto& found = result.first; | ||
| 207 | if (!found) { | ||
| 208 | return std::nullopt; | ||
| 209 | } | ||
| 210 | if (const auto immediate = std::get_if<ImmediateNode>(&*found)) { | ||
| 211 | return immediate->GetValue(); | ||
| 212 | } | ||
| 213 | return std::nullopt; | ||
| 214 | } | ||
| 215 | |||
| 216 | std::pair<Node, s64> ShaderIR::TrackRegister(const GprNode* tracked, const NodeBlock& code, | ||
| 217 | s64 cursor) const { | ||
| 218 | for (; cursor >= 0; --cursor) { | ||
| 219 | const auto [found_node, new_cursor] = FindOperation(code, cursor, OperationCode::Assign); | ||
| 220 | if (!found_node) { | ||
| 221 | return {}; | ||
| 222 | } | ||
| 223 | const auto operation = std::get_if<OperationNode>(&*found_node); | ||
| 224 | ASSERT(operation); | ||
| 225 | |||
| 226 | const auto& target = (*operation)[0]; | ||
| 227 | if (const auto gpr_target = std::get_if<GprNode>(&*target)) { | ||
| 228 | if (gpr_target->GetIndex() == tracked->GetIndex()) { | ||
| 229 | return {(*operation)[1], new_cursor}; | ||
| 230 | } | ||
| 231 | } | ||
| 232 | } | ||
| 233 | return {}; | ||
| 234 | } | ||
| 235 | |||
| 236 | } // namespace VideoCommon::Shader | ||
diff --git a/src/video_core/shader/transform_feedback.cpp b/src/video_core/shader/transform_feedback.cpp deleted file mode 100644 index 22a933761..000000000 --- a/src/video_core/shader/transform_feedback.cpp +++ /dev/null | |||
| @@ -1,115 +0,0 @@ | |||
| 1 | // Copyright 2020 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <algorithm> | ||
| 6 | #include <array> | ||
| 7 | #include <unordered_map> | ||
| 8 | |||
| 9 | #include "common/assert.h" | ||
| 10 | #include "common/common_types.h" | ||
| 11 | #include "video_core/engines/maxwell_3d.h" | ||
| 12 | #include "video_core/shader/registry.h" | ||
| 13 | #include "video_core/shader/transform_feedback.h" | ||
| 14 | |||
| 15 | namespace VideoCommon::Shader { | ||
| 16 | |||
| 17 | namespace { | ||
| 18 | |||
| 19 | using Maxwell = Tegra::Engines::Maxwell3D::Regs; | ||
| 20 | |||
| 21 | // TODO(Rodrigo): Change this to constexpr std::unordered_set in C++20 | ||
| 22 | |||
| 23 | /// Attribute offsets that describe a vector | ||
| 24 | constexpr std::array VECTORS = { | ||
| 25 | 28, // gl_Position | ||
| 26 | 32, // Generic 0 | ||
| 27 | 36, // Generic 1 | ||
| 28 | 40, // Generic 2 | ||
| 29 | 44, // Generic 3 | ||
| 30 | 48, // Generic 4 | ||
| 31 | 52, // Generic 5 | ||
| 32 | 56, // Generic 6 | ||
| 33 | 60, // Generic 7 | ||
| 34 | 64, // Generic 8 | ||
| 35 | 68, // Generic 9 | ||
| 36 | 72, // Generic 10 | ||
| 37 | 76, // Generic 11 | ||
| 38 | 80, // Generic 12 | ||
| 39 | 84, // Generic 13 | ||
| 40 | 88, // Generic 14 | ||
| 41 | 92, // Generic 15 | ||
| 42 | 96, // Generic 16 | ||
| 43 | 100, // Generic 17 | ||
| 44 | 104, // Generic 18 | ||
| 45 | 108, // Generic 19 | ||
| 46 | 112, // Generic 20 | ||
| 47 | 116, // Generic 21 | ||
| 48 | 120, // Generic 22 | ||
| 49 | 124, // Generic 23 | ||
| 50 | 128, // Generic 24 | ||
| 51 | 132, // Generic 25 | ||
| 52 | 136, // Generic 26 | ||
| 53 | 140, // Generic 27 | ||
| 54 | 144, // Generic 28 | ||
| 55 | 148, // Generic 29 | ||
| 56 | 152, // Generic 30 | ||
| 57 | 156, // Generic 31 | ||
| 58 | 160, // gl_FrontColor | ||
| 59 | 164, // gl_FrontSecondaryColor | ||
| 60 | 160, // gl_BackColor | ||
| 61 | 164, // gl_BackSecondaryColor | ||
| 62 | 192, // gl_TexCoord[0] | ||
| 63 | 196, // gl_TexCoord[1] | ||
| 64 | 200, // gl_TexCoord[2] | ||
| 65 | 204, // gl_TexCoord[3] | ||
| 66 | 208, // gl_TexCoord[4] | ||
| 67 | 212, // gl_TexCoord[5] | ||
| 68 | 216, // gl_TexCoord[6] | ||
| 69 | 220, // gl_TexCoord[7] | ||
| 70 | }; | ||
| 71 | } // namespace | ||
| 72 | |||
| 73 | std::unordered_map<u8, VaryingTFB> BuildTransformFeedback(const GraphicsInfo& info) { | ||
| 74 | |||
| 75 | std::unordered_map<u8, VaryingTFB> tfb; | ||
| 76 | |||
| 77 | for (std::size_t buffer = 0; buffer < Maxwell::NumTransformFeedbackBuffers; ++buffer) { | ||
| 78 | const auto& locations = info.tfb_varying_locs[buffer]; | ||
| 79 | const auto& layout = info.tfb_layouts[buffer]; | ||
| 80 | const std::size_t varying_count = layout.varying_count; | ||
| 81 | |||
| 82 | std::size_t highest = 0; | ||
| 83 | |||
| 84 | for (std::size_t offset = 0; offset < varying_count; ++offset) { | ||
| 85 | const std::size_t base_offset = offset; | ||
| 86 | const u8 location = locations[offset]; | ||
| 87 | |||
| 88 | VaryingTFB varying; | ||
| 89 | varying.buffer = layout.stream; | ||
| 90 | varying.stride = layout.stride; | ||
| 91 | varying.offset = offset * sizeof(u32); | ||
| 92 | varying.components = 1; | ||
| 93 | |||
| 94 | if (std::find(VECTORS.begin(), VECTORS.end(), location / 4 * 4) != VECTORS.end()) { | ||
| 95 | UNIMPLEMENTED_IF_MSG(location % 4 != 0, "Unaligned TFB"); | ||
| 96 | |||
| 97 | const u8 base_index = location / 4; | ||
| 98 | while (offset + 1 < varying_count && base_index == locations[offset + 1] / 4) { | ||
| 99 | ++offset; | ||
| 100 | ++varying.components; | ||
| 101 | } | ||
| 102 | } | ||
| 103 | |||
| 104 | [[maybe_unused]] const bool inserted = tfb.emplace(location, varying).second; | ||
| 105 | UNIMPLEMENTED_IF_MSG(!inserted, "Varying already stored"); | ||
| 106 | |||
| 107 | highest = std::max(highest, (base_offset + varying.components) * sizeof(u32)); | ||
| 108 | } | ||
| 109 | |||
| 110 | UNIMPLEMENTED_IF(highest != layout.stride); | ||
| 111 | } | ||
| 112 | return tfb; | ||
| 113 | } | ||
| 114 | |||
| 115 | } // namespace VideoCommon::Shader | ||
diff --git a/src/video_core/shader/transform_feedback.h b/src/video_core/shader/transform_feedback.h deleted file mode 100644 index 77d05f64c..000000000 --- a/src/video_core/shader/transform_feedback.h +++ /dev/null | |||
| @@ -1,23 +0,0 @@ | |||
| 1 | // Copyright 2020 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <unordered_map> | ||
| 8 | |||
| 9 | #include "common/common_types.h" | ||
| 10 | #include "video_core/shader/registry.h" | ||
| 11 | |||
| 12 | namespace VideoCommon::Shader { | ||
| 13 | |||
| 14 | struct VaryingTFB { | ||
| 15 | std::size_t buffer; | ||
| 16 | std::size_t stride; | ||
| 17 | std::size_t offset; | ||
| 18 | std::size_t components; | ||
| 19 | }; | ||
| 20 | |||
| 21 | std::unordered_map<u8, VaryingTFB> BuildTransformFeedback(const GraphicsInfo& info); | ||
| 22 | |||
| 23 | } // namespace VideoCommon::Shader | ||